Purpose

Can weather data help predict costs associated with routes?

Database Connections

DuckDB

Establish a DuckDB, embedded database connection.
duckdb_con <- dbConnect(duckdb::duckdb(
     config = list(max_memory = '24GB')), ":memory:")

Loading Custom Output Scripts

Tables

table building
# Table Theming Script ----
#' @description
#' This script provides functions to create and theme tables using the `gt` package.
#' It includes options for customizing colors, footnotes, and other stylistic elements.
#' 
# eval_palette ----
#' @description
#' A helper function to evaluate color palettes using the `paletteer` package.
#' @param pal_name The name of the palette to evaluate.
#' @param n The number of colors to generate (default is 10).
#' @param pal_type The type of palette ("c" for continuous, "d" for discrete, or "dynamic" for dynamic palettes).
#' @param direction The direction of the palette (e.g., 1 for normal, -1 for reversed).
#' 
#' @return A vector of colors corresponding to the specified palette.
#' 
#' @example
#' \dontrun{
#' colors <- eval_palette("ggsci::springfield_simpsons", n = 5, pal_type = "d")
#' }
#' @export 
eval_palette <- function(pal_name, n = 10, pal_type, direction = NULL) {
     if (pal_type == "c") {
          return(paletteer_c(pal_name, n, direction))
     } else if (pal_type == "d") {
          return(paletteer_d(pal_name, n, direction))
     } else if (pal_type == "dynamic") {
          return(paletteer_dynamic(pal_name, n, direction))
     }
}

# r_table_theming ----
#' @description
#' The main function to create and theme a table using the `gt` package.
#' @details
#' **Color Coding** Applies color palettes to specific columns or the entire table.
#' **Footnotes** Adds footnotes to specific columns or locations in the table.
#' **Column Labels** Customizes the appearance of column labels, including background colors.
#' **Table Styling** Applies various styling options, such as borders, padding, and font weights.
#' **Shadow Effects** Optionally adds shadow effects to table body cells.
#'
#' @param r_df The data frame to be converted into a table.
#' @param title The title of the table.
#' @param subtitle The subtitle of the table.
#' @param footnotes_df A data frame containing footnotes and their locations.
#' @param source_note A source note to be added at the bottom of the table.
#' @param pal_df A data frame containing color palettes and columns to apply them to.
#' @param color_by_columns Columns to apply color to (default is NULL).
#' @param row_name_col The column to use as row names (default is NULL).
#' @param do_col_labels Whether to apply custom styling to column labels (default is FALSE).
#' @param target_everything Whether to apply color to all columns (default is FALSE).
#' @param doBodyShadows Whether to apply shadow effects to table body cells (default is FALSE).
#'
#' @return A themed `gt` table object.
#' 
#' @example 
#' \dontrun{
#'   data <- data.frame(
#'     Name = c("Alice", "Bob", "Charlie"),
#'     Score = c(85, 92, 78)
#'   )
#'   pal_df <- data.frame(
#'     cols = list("Score"),
#'     pals = list(eval_palette("ggsci::springfield_simpsons", n = 3, pal_type = "d"))
#'   )
#'   footnotes_df <- data.frame(
#'     notes = list("High score"),
#'     locations = list("Score")
#'   )
#'   themed_table <- r_table_theming(
#'     r_df = data,
#'     title = "Student Scores",
#'     subtitle = "Fall 2023",
#'     footnotes_df = footnotes_df,
#'     source_note = "Source: School Records",
#'     pal_df = pal_df,
#'     do_col_labels = TRUE
#'   )
#'   themed_table
#'  }
#'  
# r_table_theming ----
# Main function to create and theme a table using the `gt` package.
#' @export
r_table_theming <- function(r_df,
                            title,
                            subtitle,
                            footnotes_df,
                            source_note,
                            pal_df,
                            color_by_columns = NULL,
                            row_name_col = NULL,
                            do_col_labels = FALSE,
                            target_everything = FALSE,
                            doBodyShadows = FALSE,
                            footnotes_multiline = TRUE,
                            table_font_size = pct(100),
                            multiline_feet = TRUE
                            ) {
     # Initialize the gt table
     if(is.null(row_name_col)) {
          # If no row name column is specified, create a basic gt table
          r_table <- gt(r_df)
     } else {
          # If a row name column is specified, use it as the row names in the table
          r_table <- gt(r_df, rowname_col = row_name_col)
     }
     
     # Apply color coding to specific columns or the entire table
     if (nrow(r_df) > 1 && target_everything == FALSE) {
          # Apply color palettes to specific columns defined in pal_df
          r_table <- seq_len(nrow(pal_df)) |>
               reduce(\(acc, i) {
                    data_color(acc,
                               columns = pal_df$cols[[i]],  # Apply color to specified columns
                               palette = pal_df$pals[[i]]   # Use the specified palette
                    )
               }, .init = r_table)  # Start with the initial table and accumulate changes
     }
     else if (nrow(r_df) > 1 && target_everything == TRUE) {
          # Apply color palettes to all columns
          r_table <- seq_len(nrow(pal_df)) |>
               reduce(\(acc, i) {
                    data_color(
                         acc,
                         columns = color_by_columns,  # Apply color to specified columns
                         palette = pal_df$pals[[i]],  # Use the specified palette
                         target_columns = everything()  # Apply color to all columns
                    )
               }, .init = r_table)  # Start with the initial table and accumulate changes
     }
     
     # Add footnotes to the table
     r_table <- seq_len(nrow(footnotes_df)) |>
          reduce(\(acc, i) {
               tab_footnote(
                    acc,
                    footnote = footnotes_df$notes[[i]],  # Add the footnote text
                    location = cells_column_labels(
                         columns = footnotes_df$locations[[i]]),  # Specify the column for the footnote
                    placement = "auto"  # Automatically place the footnote
               )
          }, .init = r_table)  # Start with the initial table and accumulate changes
     
     # Apply custom styling to column labels (if enabled)
     if (ncol(r_df) > 1 && do_col_labels == TRUE) {
          cell_col_fills = pal_df$pals[[1]]  # Get the first palette for column labels
          # Apply background colors to column labels
          r_table <- seq_len(nrow(pal_df)) |>
               reduce(\(acc, i) {
                    tab_style(
                         acc,
                         style = cell_fill(color = cell_col_fills[i]),  # Fill column labels with color
                         locations = cells_column_labels(
                              columns = pal_df$cols[[i]])  # Apply to specified columns
                    )
               }, .init = r_table)  # Start with the initial table and accumulate changes
     }
     
     # Add a title and subtitle to the table
     r_table <- r_table |>
          tab_header(title = title, subtitle = subtitle)
     
     # Add a source note at the bottom of the table
     r_table <- r_table |>
          tab_source_note(source_note = source_note)
     
     # Apply general table styling options
     r_table <- r_table |>
          tab_options(
               column_labels.padding = px(10),  # Add padding to column labels
               column_labels.font.weight = "bold",  # Make column labels bold
               column_labels.background.color = '#333',  # Set background color for column labels
               column_labels.border.top.width = px(0),  # Remove top border for column labels
               column_labels.border.bottom.color = 'black',  # Set bottom border color for column labels
               column_labels.vlines.width = px(1),  # Set vertical line width for column labels
               column_labels.border.lr.width = px(1),  # Set left/right border width for column labels
               column_labels.border.bottom.width = px(0),  # Remove bottom border for column labels
               column_labels.border.lr.color = 'black',  # Set left/right border color for column labels
               column_labels.vlines.color = 'black',  # Set vertical line color for column labels
               footnotes.padding = px(5),  # Add padding to footnotes
               footnotes.background.color = '#222',  # Set background color for footnotes
               footnotes.sep = ", ",  # Set separator for footnotes
               footnotes.multiline = footnotes_multiline,  # Allow multiline footnotes (if enabled)
               heading.padding = px(10),  # Add padding to the heading
               heading.background.color = '#222',  # Set background color for the heading
               heading.title.font.size = pct(125),  # Set font size for the title
               heading.subtitle.font.size = pct(110),  # Set font size for the subtitle
               heading.border.bottom.width = px(0),  # Remove bottom border for the heading
               row.striping.include_table_body = TRUE,  # Enable row striping for the table body
               row.striping.include_stub = TRUE,  # Enable row striping for the stub
               row.striping.background_color = '#333',  # Set background color for striped rows
               row_group.as_column = TRUE,  # Display row groups as columns
               source_notes.background.color = '#222',  # Set background color for source notes
               stub.border.width = px(0),  # Remove border for the stub
               stub.font.weight = "bolder",  # Make stub text bolder
               table.margin.left = px(1),  # Set left margin for the table
               table.margin.right = px(1),  # Set right margin for the table
               table.align = "center",  # Center-align the table
               table.border.top.width = px(0),  # Remove top border for the table
               table.border.bottom.width = px(0),  # Remove bottom border for the table
               table.background.color = '#222',  # Set background color for the table
               table.font.size = table_font_size,  # Set font size for the table
               table.layout = "auto",  # Use automatic table layout
               table_body.hlines.color = 'black',  # Set horizontal line color for the table body
               table_body.hlines.width = px(0),  # Remove horizontal lines in the table body
               table_body.vlines.width = px(0),  # Remove vertical lines in the table body
               table_body.border.bottom.color = 'black',  # Set bottom border color for the table body
               table_body.border.top.color = 'black',  # Set top border color for the table body
               table_body.border.bottom.width = px(0),  # Remove bottom border for the table body
               table_body.border.top.width = px(0),  # Remove top border for the table body
          )
     
     return(r_table)
}

Plots

plot theming
#  Plot output script ----
# normal axes ----
ggplot_theming <- function(...) {
     base_theme <- theme_minimal() +
          theme(
               axis.title = element_text(
                    color = 'gray100',
                    margin = margin(5, 5, 5, 5, "pt")
               ),
               axis.title.x = element_text(margin = margin(10, 10, 10, 10, "pt"), face = "bold"),
               axis.title.y = element_text(
                    face = "bold",
                    size = rel(1),
                    margin = margin(5, 5, 5, 5, "pt")
               ),
               axis.text = element_text(color = 'gray', margin = margin(5, 5, 5, 5, "pt")),
               axis.text.x = element_text(),
               axis.text.y = element_text(margin = margin(0, 5, 0, 5, "pt")),
               axis.text.x.top = element_text(vjust = 0.5),
               line = element_line(color = '#222'),
               legend.background = element_rect(fill = '#222'),
               legend.position = "bottom",
               legend.text = element_text(color = 'gray', size = rel(0.7)),
               legend.title = element_text(color = 'white', size = rel(1.0)),
               panel.background = element_rect(fill = '#222',
                                               linewidth = 0),
               panel.grid.major.x = element_line(linetype = 'solid', color = 'black'),
               panel.grid.minor.x = element_line(linetype = "dotted", color = 'black'),
               panel.grid.major.y = element_line(
                    linetype = 'solid',
                    color = 'black',
                    linewidth = .2
               ),
               panel.grid.minor.y = element_line(linetype = 'dotted', color = 'black'),
               plot.title = element_text(
                    face = "bold",
                    color = 'white',
                    size = rel(1.5)
               ),
               plot.background = element_rect(fill = '#222',
                                              linewidth = 0),
               plot.caption = element_text(
                    size = 10,
                    color = "gray80",
                    margin = margin(5, 2, 5, 2),
                    hjust = 0
               ),
               plot.margin = margin(10, 10, 10, 10, "pt"),
               strip.background = element_rect(fill = 'gray20'),
               strip.text = element_text(size = rel(0.8), 
                                         margin = margin(0, 0, 0, 0, "pt"),
                                         color = 'cornsilk'),
               #strip.text.y = element_text(color = "black"),
              # strip.text.x = element_text(color = "ivory", face = "plain"),
               text = element_text(size = 12)
          )
     
     base_theme + theme(...)
}

# flipped axes ----
ggplot_theming_flipped_axes <- function(...) {
     base_theme <- theme_minimal() +
          theme(
               axis.title = element_text(color = 'gray100'),
               axis.text = element_text(color = 'gray'),
               panel.background = element_rect(fill = '#222'),
               panel.grid.major.x = element_line(linetype = 'dashed'),
               panel.grid.minor.x = element_line(linetype = "dotted"),
               panel.grid.major.y = element_line(linetype = 'solid'),
               panel.grid.minor.y = element_line(linetype = 'dotted'),
               plot.title = element_text(color = 'white', size = rel(2)),
               plot.background = element_rect(fill = '#222'),
               legend.background = element_rect(fill = '#222'),
               legend.text = element_text(color = 'gray'),
               legend.title = element_text(color = 'white')
          )
     
     base_theme + theme(...)
     
}
plot building
# Load necessary libraries
library(DBI)          # For database connectivity
library(ggplot2)      # For creating plots
library(scales)       # For scaling and formatting axes
library(openair)      # For specialized plots like wind roses
source("./scripts/Output/Plots/plot_themer.R")  # Custom theme for ggplot

# Helper function to execute a query and return the result
execute_query <- function(con, query) {
     dbGetQuery(con, query)  # Execute the SQL query and return the result
}


plot_temperature_trend <- function(con, freezing_threshold = 32) {
     # Query to fetch temperature data for the day
     query <- "
    SELECT
      temperature_2m,
      time_only,
      common_date,
      month_day
    FROM
      forecast_data
    WHERE
      latitude = 38.748;
  "
     
     data <- execute_query(con, query)  # Execute the query and get the data
     
     # Calculate bar width based on time intervals
     if (nrow(data) > 1) {
          time_diff <- as.numeric(difftime(data$common_date[2], data$common_date[1], units = "secs"))
     } else {
          time_diff <- 3600  # Default to 1 hour if only one data point
     }
     half_width <- time_diff / 2
     
     # Prepare data for rectangular columns
     data <- data %>%
          arrange(common_date) %>%
          mutate(
               xmin = common_date - half_width,
               xmax = common_date + half_width,
               fill_group = ifelse(temperature_2m > freezing_threshold, "above freezing", "below freezing"),
               ymin = ifelse(temperature_2m > freezing_threshold, freezing_threshold, temperature_2m),
               ymax = ifelse(temperature_2m > freezing_threshold, temperature_2m, freezing_threshold)
          )
     
     # Create a ggplot object for temperature trend
     rPlot <- ggplot(data, aes(x = common_date, y = temperature_2m)) +
          geom_rect(
               aes(
                    xmin = xmin,
                    xmax = xmax,
                    ymin = ymin,
                    ymax = ymax,
                    fill = fill_group
               ),
               color = 'black',
               alpha = 0.5
          ) +  # Column rectangles
     #     geom_line(color = "black", size = 0.5) +  # Line plot for temperature
          geom_hline(
               yintercept = freezing_threshold,
               linetype = "dashed",
               color = "lightblue",
               linewidth = 0.4
          ) +  # Horizontal line for freezing threshold
          labs(
               title = "Temperature Forecast",
               x = "",
               y = "° F"
          ) +  # Labels for the plot
          scale_x_datetime(
               labels = label_date("%l %p"),
               breaks = "6 hours",
               minor_breaks = "2 hours",
               guide = guide_axis(n.dodge = 1)
          ) +  # Format x-axis for time
          scale_y_continuous(sec.axis = dup_axis(name = "")) +  # Secondary y-axis
          scale_fill_manual(
               name = "Freezing Indicators",
               values = c(
                    "above freezing" = "green",
                    "below freezing" = "lightblue"
               )
          ) +  # Manual color scale
          facet_grid(~ month_day) +  # Facet by month_day
          ggplot_theming()  # Apply custom theme
     
     # Save the plot as a PNG file
     base_path <- "data/plots/"
     plot_path <- paste0(base_path, "ggTemperature.png")
     ggsave(plot_path, plot = rPlot, scale = 1.5)
     
     # Read the PNG file and display it
     img <- readPNG(plot_path)
     grid::grid.raster(img)
}

# Precipitation and Probability ----
plot_precipitation <- function(con) {
     # Query to fetch precipitation data
     query <- "
    SELECT
      precipitation_probability,
      precipitation,
      rain,
      snowfall,
      time_only,
      common_date,
      month_day
    FROM
      forecast_data
    WHERE
      latitude = 38.748;
  "
     
     data <- execute_query(con, query)  # Execute the query and get the data
     
     # Calculate scale factor for secondary y-axis
     scale_factor <- max(data$precipitation_probability, 
                         na.rm = TRUE) / max(data$rain, 
                                             data$snowfall, na.rm = TRUE)
     
     # Create a ggplot object for precipitation
     rPlot <- ggplot(data, aes(x = as.POSIXct(common_date))) +
          geom_area(
               aes(y = precipitation_probability, fill = "Precipitation Probability"),
               #position = "jitter"
               linewidth = 0.2
          ) +  # Area plot for precipitation probability
          geom_col(
               aes(y = rain * scale_factor, fill = "Rain (in.)"),
               #size = 1,
               alpha = 0.3,
               position = "stack",
               #linetype = "dashed"
          ) +  # Line plot for rain
          geom_col(
               aes(y = snowfall * scale_factor, fill = "Snowfall (in.)"),
               #size = 1,
               alpha = 0.3,
               position = "stack",
               #linetype = "dotted"
          ) +  # Line plot for snowfall
          scale_y_continuous(
               name = "Precipitation Probability (%)",
               sec.axis = sec_axis( ~ . / ifelse(
                    is.infinite(scale_factor), 1000, scale_factor
               ), name = "Rain / Snowfall (inches)")
          ) +  # Dual y-axes
          scale_x_datetime(
               labels = scales::date_format("%H:%M"),
               breaks = "6 hours",
               minor_breaks = "2 hour",
               guide = guide_axis(n.dodge = 1)
          ) +  # Format x-axis for time
          scale_fill_manual(
               name = "Weather Condition",
               values = c(
                    "Rain (in.)" = "skyblue",
                    "Snowfall (in.)" = "snow"
               )
          ) +  # Manual color scale for weather conditions
          scale_fill_manual(
               name = "Precipitation\n and Probability",  # Single legend title
               values = c(
                    "Rain (in.)" = "skyblue", 
                    "Snowfall (in.)" = "snow", 
                    "Precipitation Probability" = "gray20"
               )) +
               labs(title = "Precipitation Forecast", 
               x = "Time of Day", 
               y = "Precipitation Probability (%)") +  # Labels for the plot
          facet_grid(~ month_day) +  # Facet by month_day
          ggplot_theming(legend.position = "bottom", 
                         legend.text = element_text(size = rel(0.5)),
                         legend.title = element_text(size = rel(0.7)))  # Apply custom theme
     
     # Save the plot as a PNG file
     base_path <- "data/plots/"
     plot_path <- paste0(base_path, "ggPrecipitation.png")
     ggsave(plot_path, plot = rPlot, scale = 1.5)
     
     # Read the PNG file and display it
     img <- readPNG(plot_path)
     grid::grid.raster(img)
     
}

# OpenAir Wind Rose ----
plot_wind_rose <- function(con) {
     # Query to fetch wind data
     query <- "
    SELECT
      wind_speed_10m,
      wind_direction_10m,
      time_only,
      common_date,
      month_day
    FROM
      forecast_data
    WHERE
      latitude = 38.748;
  "
     
     data <- execute_query(con, query)  # Execute the query and get the data
     
     # Create a wind rose plot using the openair package
     windRose(
          data,
          ws = "wind_speed_10m",
          wd = "wind_direction_10m",
          breaks = 5,
          paddle = TRUE,
          cols = paletteer_d("ggsci::springfield_simpsons", n = 3),
          key.position = "left"
     )
}

# ggplot wind rose ----
plot_wind_rose_ggplot <- function(con) {
     # Query to fetch wind data
     query <- "
       SELECT
         wind_direction_10m,
         speed_bin,
         wind_direction_cardinal,
         direction_angle,
         time_only,
         month_day
       FROM forecast_data
       WHERE
         latitude = 38.748;
     "
     
     data <- execute_query(con, query)  # Execute the query and get the data
     
     # Summarize data for plotting
     plot_data <- data |>
          group_by(wind_direction_10m, speed_bin, month_day, time_only) |>
          summarise(count = n(), .groups = "drop")
     
     # Get unique days
     days <- unique(plot_data$month_day)
     
     walk(days, ~ {
          # Filter data for the current day
          day_data <- filter(plot_data, month_day == .x)
          
          # Create the wind rose plot for the current day
          day_plot <- ggplot(day_data,
                             aes(
                                  x = wind_direction_10m, y = count, fill = speed_bin
                             )) +
               geom_col(width = 15,
                        color = "black",
                        linewidth = 0.1) +
               coord_polar(start = 2 * pi) +
               scale_x_continuous(
                    limits = c(0, 360),
                    breaks = seq(22.5, 360, by = 22.5),
                    labels = c(' ', 'NE', ' ', 'E', ' ', 'SE', ' ', 'S', ' ','SW', ' ', 'W', ' ', 'NW', ' ', 'N')  # Cardinal labels
               ) +
               scale_fill_paletteer_d('ggprism::viridis') +
               labs(
                    title = paste("Wind Rose -", .x),
                    x = "Wind Direction (°)",
                    y = "",
                    fill = "Wind Speed (m/s)"
               ) +
               facet_wrap( ~ time_only) +  # Facet by hour
               ggplot_theming(
                    text = element_text(size = 8),
                    axis.text = element_text(
                         color = 'gray',
                         margin = margin(5, 5, 5, 5, "pt"),
                         size = rel(.8)
                    ),
                    axis.text.y = element_blank(),
                    strip.background = element_rect(fill = 'gray20'),
                    #strip.background.y = element_rect('#39D94E'),
                    strip.text = element_text(size = rel(0.8), 
                                              margin = margin(0, 0, 0, 0, "pt"),
                                              color = 'cornsilk'),
                    
               )
          
          # Save the plot for the current day
          ggsave(
               stringr::str_remove(paste0("data/plots/wind_rose_", .x, ".png"), " "),
               day_plot,
               #width = 24,
               #height = 20,
               scale = 2
          )
     })
     
     }


# Visibility geom_line ----
plot_visibility_line <- function(con) {
     # Query to fetch visibility data
     query <- "
    SELECT
      visibility,
      common_date,
      month_day
    FROM
      forecast_data
    WHERE
      latitude = 38.748;
  "
     
     data <- execute_query(con, query)  # Execute the query and get the data
     
     # Create a ggplot object for visibility trend
     rPlot <- ggplot(data, aes(x = common_date, y = visibility / 10 ^ 3)) +
          geom_line(color = "white", size = 0.5) +  # Line plot for visibility
          geom_point(color = "gray", alpha = 1) +  # Points for visibility
          labs(title = "Visibility Map", x = "Date", y = "Visibility (km)") +  # Labels for the plot
          scale_x_datetime(
               labels = scales::date_format("%H:%M"),
               breaks = "6 hours",
               minor_breaks = "2 hour",
               guide = guide_axis(n.dodge = 1)
          ) +  # Format x-axis for time
          facet_grid(~ month_day) +  # Facet by month_day
          ggplot_theming()  # Apply custom theme
     
     # Save the plot as a PNG file
     base_path <- "data/plots/"
     plot_path <- paste0(base_path, "ggVisibilityLine.png")
     ggsave(plot_path, plot = rPlot, scale = 1.5)
     
     # Read the PNG file and display it
     img <- readPNG(plot_path)
     grid::grid.raster(img)
     
}

# Visibility Non-Categorical Heat ----
plot_visibility_heat <- function(con) {
     # Query to fetch visibility data
     query <- "
    SELECT
      visibility,
      common_date,
      time_only,
      month_day
    FROM
      forecast_data
    WHERE
      latitude = 38.748;
  "
     
     data <- execute_query(con, query)  # Execute the query and get the data
     data$time_only <- as.POSIXct(data$time_only, format = "%H:%M:%S")
     
     # Create a ggplot object for visibility heatmap
     rPlot <- ggplot(data, aes(
          x = month_day,
          y = time_only,
          fill = visibility / 10 ^ 3
     )) +
          geom_tile() +  # Tile plot for visibility
          scale_fill_viridis_c(option = "magma") +  # Color scale for visibility
          labs(
               title = "Visibility (km)",
               x = "Time of Day",
               y = "Date",
               fill = "Visibility (km)"
          ) +  # Labels for the plot
          scale_y_datetime(
               date_labels = "%H:%M",
               date_breaks = "2 hours",
               sec.axis = dup_axis(name = "")
          ) +  # Format x-axis for time
          facet_grid(~ month_day, scales = "free") +
          ggplot_theming(legend.position = "right")  # Apply custom theme
     
     # Save the plot as a PNG file
     base_path <- "data/plots/"
     plot_path <- paste0(base_path, "ggVisibilityHeat.png")
     ggsave(plot_path, plot = rPlot, scale = 1.5)
     
     # Read the PNG file and display it
     img <- readPNG(plot_path)
     grid::grid.raster(img)
}

# Visibility Categorical Heat ----
plot_visibility_categorical_heat <- function(con) {
     # Query to fetch visibility data
     query <- "
    SELECT
      visibility,
      visibility_category,
      common_date,
      time_only,
      month_day
    FROM
      forecast_data
    WHERE
      latitude = 38.748;
  "
     
     data <- execute_query(con, query)  # Execute the query and get the data

          # Create a ggplot object for categorical visibility heatmap
     # Convert time_only to POSIXct for plotting
     data$time_only <- as.POSIXct(data$time_only, format = "%H:%M:%S")
     
     # Create a ggplot object for weather codes
     rPlot <- ggplot(data, aes(x = month_day, y = time_only, fill = visibility_category)) +
          geom_tile() +  # Tile plot for visibility categories
          scale_fill_manual(
               values = c(
                    "Clearest (>30 km)" = "green",
                    "Excellent (10-30 km)" = "darkgreen",
                    "Good (5-10 km)" = "yellow",
                    "Moderate (2-5 km)" = "orange",
                    "Low (1-2 km)" = "red",
                    "Fog/Haze (<1 km)" = "purple"
               )
          ) +  # Manual color scale for visibility categories
          labs(
               title = "Visibility Category Map",
               x = "Date",
               y = "Time of Day",
               fill = "Visibility Level"
          ) +  # Labels for the plot
          scale_y_datetime(
               date_labels = "%H:%M",
               date_breaks = "2 hours",
               sec.axis = dup_axis(name = "")
          ) +  # Format y-axis for time
          facet_grid(~ month_day, scales = "free") +
          ggplot_theming(legend.position = "right")  # Apply custom theme
     
     # Save the plot as a PNG file
     base_path <- "data/plots/"
     plot_path <- paste0(base_path, "ggVisibilityCat.png")
     ggsave(plot_path, plot = rPlot, scale = 1.5)
     
     # Read the PNG file and display it
     img <- readPNG(plot_path)
     grid::grid.raster(img)
}

# Weather Codes ----
plot_weather_codes <- function(con) {
     # Query to fetch weather codes and descriptions
     query <- "
    SELECT
      fd.weather_code,
      wc.Description AS description,
      fd.time_only,
      fd.month_day
    FROM
      forecast_data fd
    LEFT JOIN weather_codes wc ON wc.weather_code == fd.weather_code
    WHERE
      latitude = 38.748;
  "
     
     data <- execute_query(con, query)  # Execute the query and get the data
     
     # Convert time_only to POSIXct for plotting
     data$time_only <- as.POSIXct(data$time_only, format = "%H:%M:%S")
     
     # Create a ggplot object for weather codes
     rPlot <- ggplot(
          data, aes(x = month_day, y = time_only, fill = description)) +
          geom_tile(alpha = 0.5) +  # Tile plot for weather codes
          scale_fill_paletteer_d("khroma::land") +  # Color scale for weather codes
          scale_y_datetime(
               date_labels = "%H:%M",
               date_breaks = "2 hours",
               sec.axis = dup_axis(name = "")
          ) +  # Format y-axis for time
          labs(
               title = "Weather Code Map",
               x = "Day",
               y = "Time of Day",
               fill = "Weather Code"
          ) +  # Labels for the plot
          facet_grid(~ month_day, scales = "free") +
          ggplot_theming(legend.position = "right")  # Apply custom theme
     
     # Save the plot as a PNG file
     base_path <- "data/plots/"
     plot_path <- paste0(base_path, "ggWeatherCodes.png")
     ggsave(plot_path, plot = rPlot, scale = 1.5)
     
     # Read the PNG file and display it
     img <- readPNG(plot_path)
     grid::grid.raster(img)
}

display_a_plot <- function(plot_path) {
     # Read the PNG file and display it
     img <- readPNG(plot_path)
     grid::grid.raster(img)     
}

Data Ingestion Workflow

Data moves from:

Python ingestion → R loading → SQL transformations → final table materialization

workflow

workflow

Import

Weather Data API

(“🌤️ Free Open-Source Weather API Open-Meteo.com” n.d.)

“🌤️ Free Open-Source Weather API Open-Meteo.com.” n.d. Accessed February 8, 2025. https://open-meteo.com/.

Forecast

Run the API script to import the dataset.
import pandas as pd  # For generating the date range
import requests_cache  # For caching API requests to reduce load and improve performance
from retry_requests import retry  # For retrying failed API requests
import openmeteo_requests  # For interacting with the Open-Meteo API
from datetime import datetime, timezone  # For handling date and time

def import_api_hourly(latitude: float, longitude: float) -> pd.DataFrame:
     """
     Fetches hourly weather data from the Open-Meteo API for the given latitude and longitude.
     
     Parameters:
        latitude (float): The latitude of the location for which weather data is requested.
        longitude (float): The longitude of the location for which weather data is requested.
     
     Returns:
        pd.DataFrame: A Pandas DataFrame containing hourly weather data for the specified location.
     """
     
     # Setup the Open-Meteo API client with cache and retry on error
     # Caching reduces the number of API calls by storing responses for 1 hour (3600 seconds)
     cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
     
     # Retry mechanism: retry up to 5 times with exponential backoff if the request fails
     retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
     
     # Initialize the Open-Meteo API client with the cached and retry-enabled session
     openmeteo = openmeteo_requests.Client(session = retry_session)
     
     # Define the API endpoint and parameters for the weather data request
     url = "https://api.open-meteo.com/v1/forecast"
     params = {
        "latitude": latitude,  # Latitude of the location
        "longitude": longitude,  # Longitude of the location
        "hourly": [  # List of hourly weather variables to fetch
            "temperature_2m",  # Temperature at 2 meters above ground
            "precipitation_probability",  # Probability of precipitation
            "precipitation",  # Total precipitation
            "rain",  # Rain amount
            "showers",  # Showers amount
            "snowfall",  # Snowfall amount
            "snow_depth",  # Snow depth
            "weather_code",  # Weather condition code
            "visibility",  # Visibility
            "wind_speed_10m",  # Wind speed at 10 meters above ground
            "wind_direction_10m"  # Wind direction at 10 meters above ground
        ],
        "temperature_unit": "fahrenheit",  # Temperature unit (Fahrenheit)
        "wind_speed_unit": "mph",  # Wind speed unit (miles per hour)
        "precipitation_unit": "inch",  # Precipitation unit (inches)
        "timezone": "America/Chicago",  # Timezone for the data
        #"forecast_days": 1,  # Number of forecast days (1 day)
        "past_hours": 6,  # Include past 6 hours of data
        "forecast_hours": 24,  # Include next 24 hours of forecast
        "models": "best_match"  # Use the best matching weather model
     }
     
     # Make the API request to fetch weather data
     responses = openmeteo.weather_api(url, params = params)
     
     # Process the first location in the response (only one location is requested)
     response = responses[0]
     
     # Print location and timezone information for debugging
     print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
     print(f"Elevation {response.Elevation()} m asl")
     print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
     print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")
     
     # Process hourly data. The order of variables needs to be the same as requested.
     hourly = response.Hourly()
     hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
     hourly_precipitation_probability = hourly.Variables(1).ValuesAsNumpy()
     hourly_precipitation = hourly.Variables(2).ValuesAsNumpy()
     hourly_rain = hourly.Variables(3).ValuesAsNumpy()
     hourly_showers = hourly.Variables(4).ValuesAsNumpy()
     hourly_snowfall = hourly.Variables(5).ValuesAsNumpy()
     hourly_snow_depth = hourly.Variables(6).ValuesAsNumpy()
     hourly_weather_code = hourly.Variables(7).ValuesAsNumpy()
     hourly_visibility = hourly.Variables(8).ValuesAsNumpy()
     hourly_wind_speed_10m = hourly.Variables(9).ValuesAsNumpy()
     hourly_wind_direction_10m = hourly.Variables(10).ValuesAsNumpy()
     
     hourly_data = {"date": pd.date_range(
        start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
        end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
        freq = pd.Timedelta(seconds = hourly.Interval()),
        inclusive = "left"
     )}
     
     hourly_data["latitude"] = latitude
     hourly_data["longitude"] = longitude
     hourly_data["temperature_2m"] = hourly_temperature_2m
     hourly_data["precipitation_probability"] = hourly_precipitation_probability
     hourly_data["precipitation"] = hourly_precipitation
     hourly_data["rain"] = hourly_rain
     hourly_data["showers"] = hourly_showers
     hourly_data["snowfall"] = hourly_snowfall
     hourly_data["snow_depth"] = hourly_snow_depth
     hourly_data["weather_code"] = hourly_weather_code
     hourly_data["visibility"] = hourly_visibility
     hourly_data["wind_speed_10m"] = hourly_wind_speed_10m
     hourly_data["wind_direction_10m"] = hourly_wind_direction_10m
     
     #data = pd.DataFrame(data = hourly_data)
     
     return(pd.DataFrame(data = hourly_data))
Write hourly api results.
coordinates <- list(
  c(38.748, -90.439),  # Original coordinates
  c(40.7128, -74.0060),  # New York
  c(34.0522, -118.2437)  # Los Angeles
)

lats <- purrr::map_dbl(coordinates, 1)
lons <- purrr::map_dbl(coordinates, 2)

purrr::walk2(lats, lons, \(lat, lon) {
  dbWriteTable(
    duckdb_con,
    "forecast_data",
    py$import_api_hourly(lat, lon),
    append = TRUE
  )
}, .progress = FALSE)
Coordinates 38.74498748779297°N -90.4433364868164°E
Elevation 175.0 m asl
Timezone b'America/Chicago' b'GMT-5'
Timezone difference to GMT+0 -18000 s
Coordinates 40.71033477783203°N -73.99308776855469°E
Elevation 32.0 m asl
Timezone b'America/Chicago' b'GMT-5'
Timezone difference to GMT+0 -18000 s
Coordinates 34.06025695800781°N -118.23432922363281°E
Elevation 91.0 m asl
Timezone b'America/Chicago' b'GMT-5'
Timezone difference to GMT+0 -18000 s

Historical

#| label: loadHourlyHistoricalAPIscript
#| code-summary: "Run the API script to import the dataset."
#| file: "scripts/Import/API/Hourly/import_api_hourly_historical.py"
#| label: writeHistoricalAPIdata
#| code-summary: "Write hourly api historical data."

save_to_partition <- function(df, lat, lon) {
  # Add partitioning columns to the data frame
  df <- df|>
    mutate(
      lat = lat,
      lon = lon,
      year = year(date),
      month = month(date)
    )
  
  # Write to Hive partitions (folders auto-created)
  arrow::write_dataset(
    df,
    path = "data/historical_weather/",
    format = "parquet",
    partitioning = c("lat", "lon", "year", "month"),
    existing_data_behavior = "overwrite"  # or "delete_matching"
  )
}

coordinates1 <- list(
     c(34.0522, -118.2437),   # Los Angeles, CA (Start)
     c(33.9806, -117.3755),   # Riverside, CA (I-215 logistics)
     c(34.1495, -117.2345),   # San Bernardino, CA (I-10/I-215 interchange)
     c(33.6103, -114.5964),   # Blythe, CA (I-10 desert truck stop)
     c(33.4484, -112.0740)   # Phoenix, AZ (I-10)
)

coordinates2 <- list(
     c(35.1983, -111.6513),   # Flagstaff, AZ (I-40 mountain gateway)
     c(35.0844, -106.6504),   # Albuquerque, NM (I-40)
     c(34.9333, -104.6876),   # Santa Rosa, NM (I-40 rest area)
     c(35.2210, -101.8313),   # Amarillo, TX (I-40, "Big Texan" truck stop)
     c(35.2161, -100.2491)   # Shamrock, TX (I-40, near OK border)
)

coordinates3 <- list(
     c(35.4676, -97.5164),    # Oklahoma City, OK (I-40/I-44 junction)
     c(36.7538, -95.2206),    # Miami, OK (I-44, near MO border)
     c(37.0842, -94.5133),    # Joplin, MO (I-44 truck hub)
     c(38.7480, -90.4390),    # St. Louis, MO (I-44/I-70 interchange)
     c(39.1200, -88.5435)    # Effingham, IL (I-70 logistics hub)
)

coordinates4 <- list(
     c(39.7684, -86.1581),    # Indianapolis, IN (I-70 "Crossroads of America")
     c(39.7589, -84.1916),    # Dayton, OH (I-70/I-75 junction)
     c(40.4406, -79.9959),    # Pittsburgh, PA (I-76)
     c(39.9995, -78.2341),    # Breezewood, PA (I-70/I-76 truck stop)
     c(40.7357, -74.1724)     # Newark, NJ (End, NYC metro)
)

lats <- purrr::map_dbl(coordinates4, 1)
lons <- purrr::map_dbl(coordinates4, 2)

purrr::walk2(lats, lons, \(lat, lon) {

     df <- py$import_api_hourly_historical(lat, lon, "1974-01-01", "2024-12-31")
     
     save_to_partition(df, lat, lon)

     # Delay to avoid API rate limits
     Sys.sleep(300)

}, .progress = FALSE)

storage://

  • data/

    • weather_data/

      • lat=34.0522/

        • lon=-118.2437/

          • year=2024/

            • month=01/

              • part-0.parquet
Code
SELECT * --lat/lon/year/month
FROM read_parquet(
     'data/historical_weather/*/*/*/*/part-0.parquet', 
     hive_partitioning = true)
LIMIT 100;
Create a table from the hive partitioned dataset.
CREATE OR REPLACE TABLE historical_data AS 
SELECT * 
FROM read_parquet(
     'data/historical_weather/*/*/*/*/part-0.parquet', --lat/lon/year/month
     hive_partitioning = true);

About the Weather Data

The study, published in the Weather and Forecasting journal, focuses on evaluating and improving the accuracy of weather prediction models, particularly for severe weather events. It examines the performance of high-resolution numerical weather prediction (NWP) models in forecasting convective storms, which are critical for predicting severe weather such as thunderstorms, hail, and tornadoes. The research highlights advancements in model resolution, data assimilation techniques, and the integration of observational data to enhance forecast precision. The findings emphasize the importance of these improvements for short-term (nowcasting) and medium-range forecasts, particularly in regions prone to severe weather, like the central United States (including Missouri). Dowell et al. (2022)

Dowell, David C., Curtis R. Alexander, Eric P. James, Stephen S. Weygandt, Stanley G. Benjamin, Geoffrey S. Manikin, Benjamin T. Blake, et al. 2022. “The High-Resolution Rapid Refresh (HRRR): An Hourly Updating Convection-Allowing Forecast Model. Part I: Motivation and System Description.” Weather and Forecasting 37 (8): 1371–95. https://doi.org/10.1175/WAF-D-21-0151.1.
table setup
# Create the tibble
forecast_models <- tibble(
     Model = c("GFS", "HRRR"),
     Developed_By = c(
          "NOAA (National Oceanic and Atmospheric Administration)",
          "NOAA (specifically by the Earth System Research Laboratory)"
     ),
     Scope = c(
          "Global",
          "Regional (primarily focused on the contiguous United States)"
     ),
     Resolution = c(
          "Lower resolution compared to HRRR (approximately 13 km as of recent updates)",
          "High resolution (3 km)"
     ),
     Forecast_Range = c("Up to 16 days", "Up to 18 hours"),
     Updates = c("Runs four times a day (00Z, 06Z, 12Z, 18Z)", "Runs every hour"),
     Applications = c(
          "Used for long-term weather forecasting, climate modeling, and global weather patterns.",
          "Ideal for short-term, detailed weather forecasting, including severe weather events like thunderstorms, tornadoes, and localized precipitation."
     )
)

locations_list = colnames(forecast_models)

notes_list =  list(
     "",
  "Organization or entity responsible for developing the model.",
  "Geographical coverage of the model (e.g., global or regional).",
  "Spatial resolution of the model, indicating the level of detail in the forecasts.",
  "Time period for which the model provides forecasts.",
  "Frequency at which the model is updated with new data.",
  "Primary uses and strengths of the model in weather forecasting."
  )

footnotes_df <- tibble(
  notes = notes_list, 
  locations = locations_list)

pal_df <- tibble(
  cols = locations_list
#  pals = list(eval_palette("viridis::viridis", 2, 'c', 1))
)

rTable <- r_table_theming(
forecast_models,
title = "Forecast Models: Attributes",
subtitle = NULL,
footnotes_df,
source_note = md("**source**: "),
pal_df,
multiline_feet = TRUE,
table_font_size = pct(85),
target_everything = TRUE,
row_name_col = "Model",
)
Table 1
Forecast Models: Attributes
Developed_By1 Scope2 Resolution3 Forecast_Range4 Updates5 Applications6
GFS NOAA (National Oceanic and Atmospheric Administration) Global Lower resolution compared to HRRR (approximately 13 km as of recent updates) Up to 16 days Runs four times a day (00Z, 06Z, 12Z, 18Z) Used for long-term weather forecasting, climate modeling, and global weather patterns.
HRRR NOAA (specifically by the Earth System Research Laboratory) Regional (primarily focused on the contiguous United States) High resolution (3 km) Up to 18 hours Runs every hour Ideal for short-term, detailed weather forecasting, including severe weather events like thunderstorms, tornadoes, and localized precipitation.
source:
1 Organization or entity responsible for developing the model.
2 Geographical coverage of the model (e.g., global or regional).
3 Spatial resolution of the model, indicating the level of detail in the forecasts.
4 Time period for which the model provides forecasts.
5 Frequency at which the model is updated with new data.
6 Primary uses and strengths of the model in weather forecasting.
table setup
forecast_model_differences <- tibble(
"Resolution" = c(
"HRRR has a much higher resolution than GFS, making it more accurate for short-term, localized forecasts."
),
"Forecast_Range" = c("GFS provides forecasts for a much longer period compared to HRRR."),
"Update_Frequency" =  c(
"HRRR updates more frequently, which is crucial for capturing rapidly changing weather conditions."
)
)

locations_list = colnames(forecast_model_differences)

notes_list =  list(
  "Spatial resolution of the model, indicating the level of detail in the forecasts.",
  "Time period for which the model provides forecasts.",
  "Frequency at which the model is updated with new data.")

footnotes_df <- tibble(
  notes = notes_list, 
  locations = locations_list)

pal_df <- tibble(
  cols = locations_list
#  pals = list(eval_palette("viridis::viridis", 2, 'c', 1))
)

rTable <- r_table_theming(
forecast_model_differences,
title = "Forecast Models: Differences",
subtitle = NULL,
footnotes_df,
source_note = md("**source**: "),
pal_df,
multiline_feet = TRUE,
table_font_size = pct(85),
target_everything = TRUE,
row_name_col = NULL
)
Table 2
Forecast Models: Differences
Resolution1 Forecast_Range2 Update_Frequency3
HRRR has a much higher resolution than GFS, making it more accurate for short-term, localized forecasts. GFS provides forecasts for a much longer period compared to HRRR. HRRR updates more frequently, which is crucial for capturing rapidly changing weather conditions.
source:
1 Spatial resolution of the model, indicating the level of detail in the forecasts.
2 Time period for which the model provides forecasts.
3 Frequency at which the model is updated with new data.

Database Setup

load enum file
#' Create ENUM Type and Associate Codes with Descriptions
#'
#' This function creates an ENUM type in DuckDB and associates codes with their descriptions.
#' It can be used to create other ENUM types and associations
#'
#' @param duckdb_conn A DuckDB connection object.
#' @param enum_name A string specifying the name of the ENUM type to be created.
#' @param table_name A string specifying the name of the ENUM dictionary table.
#' @param codes A character vector of codes to be included in the ENUM type.
#' @param descriptions A character vector of descriptions corresponding to the codes.
#' @example
#' \dontrun{
#' library(DBI)
#' 
#' codes <- c('0', '1', '2', '3', '45', '48', '51', '53', '55', '56', '57', 
#'            '61', '63', '65', '66', '67', '71', '73', '75', '77', '80', '81', 
#'            '82', '85', '86', '95', '96', '99')
#' descriptions <- c('Clear sky', 'Mainly clear', 'Partly cloudy', 'Overcast', 
#'                   'Fog', 'Depositing rime fog', 'Drizzle: Light', 'Drizzle: Moderate', 
#'                   'Drizzle: Dense', 'Freezing Drizzle: Light', 'Freezing Drizzle: Dense', 
#'                   'Rain: Slight', 'Rain: Moderate', 'Rain: Heavy', 'Freezing Rain: Light', 
#'                   'Freezing Rain: Heavy', 'Snow fall: Slight', 'Snow fall: Moderate', 
#'                   'Snow fall: Heavy', 'Snow grains', 'Rain showers: Slight', 
#'                   'Rain showers: Moderate', 'Rain showers: Violent', 'Snow showers: Slight', 
#'                   'Snow showers: Heavy', 'Thunderstorm: Slight or moderate', 
#'                   'Thunderstorm with slight hail', 'Thunderstorm with heavy hail')
#' 
#' result <- create_enum_and_associate(duckdb_con, "WeatherCode", codes, descriptions)
#' print(result)
#' }
#' @export
create_enum_and_associate <- function(duckdb_con, enum_name, table_name, code_frame) {
     
     # Attempt to drop the ENUM type if it exists
     drop_query <- paste0("DROP TYPE IF EXISTS ", enum_name, ";")
     
     tryCatch({
          dbExecute(duckdb_con, drop_query)
          message(paste("Dropped existing ENUM type:", enum_name))
     }, error = \(e) {
          message(paste0("No existing ENUM type to drop: ", enum_name))
     })
     
     # Create the ENUM type
     enum_query <- paste0(
          "CREATE TYPE ", enum_name, " AS ENUM (",
          paste0(
               "'", code_frame$code, "'", collapse = ", "), ");"
          )
     
     dbExecute(duckdb_con, enum_query)
     message(paste0("Created ENUM type: ", enum_name))
     
     # Write an association table for reference
     dbWriteTable(
          duckdb_con,
          table_name,
          code_frame,
          overwrite = TRUE
     )
}
Sets the custom data types in the database.
code_frame <- tibble::tibble(
weather_code = c(
     '0',
     '1',
     '2',
     '3',
     '45',
     '48',
     '51',
     '53',
     '55',
     '56',
     '57',
     '61',
     '63',
     '65',
     '66',
     '67',
     '71',
     '73',
     '75',
     '77',
     '80',
     '81',
     '82',
     '85',
     '86',
     '95',
     '96',
     '99'
),

description = c(
     'Clear sky',
     'Mainly clear',
     'Partly cloudy',
     'Overcast',
     'Fog',
     'Depositing rime fog',
     'Drizzle: light',
     'Drizzle: moderate',
     'Drizzle: dense',
     'Freezing drizzle: light',
     'Freezing drizzle: dense',
     'Rain: slight',
     'Rain: moderate',
     'Rain: heavy',
     'Freezing rain: light',
     'Freezing rain: heavy',
     'Snow fall: slight',
     'Snow fall: moderate',
     'Snow fall: heavy',
     'Snow grains',
     'Rain showers: slight',
     'Rain showers: moderate',
     'Rain showers: violent',
     'Snow showers: slight',
     'Snow showers: heavy',
     'Thunderstorm: slight or moderate',
     'Thunderstorm with slight hail',
     'Thunderstorm with heavy hail'
),

implication = c(
     "Normal operations - No restrictions",              # Clear sky
     "Normal operations - Increased vigilance",          # Mainly clear
     "Normal operations - Monitor weather updates",      # Partly cloudy
     "Reduced visibility - Maintain safe following distance", # Overcast
     "Speed reduction required - Fog lights mandatory",  # Fog
     "Speed reduction required - Extreme caution",        # Depositing rime fog
     "Potential minor delays - Road surface slickness",   # Drizzle: light
     "Speed restrictions - 15% reduction recommended",    # Drizzle: moderate
     "Mandatory speed reduction - 25%+",                 # Drizzle: dense
     "Chain requirement - Level 1 traction advisory",     # Freezing drizzle: light
     "Road closure likely - Avoid non-essential travel",  # Freezing drizzle: dense
     "Increased stopping distance - 10% speed reduction", # Rain: slight
     "15-20% speed reduction - Check tire tread",         # Rain: moderate
     "25%+ speed reduction - Possible detour routing",    # Rain: heavy
     "Mandatory chains - Temperature monitoring",         # Freezing rain: light
     "Road closure imminent - Immediate stop advised",    # Freezing rain: heavy
     "15% speed reduction - Traction control engaged",    # Snow fall: slight
     "25% speed reduction - Chain requirement possible",  # Snow fall: moderate
     "Road closure likely - Abandon shipment staging",    # Snow fall: heavy
     "Speed restriction - Watch for black ice",           # Snow grains
     "Increased following distance - 4-second rule",      # Rain showers: slight
     "20% speed reduction - Avoid lane changes",          # Rain showers: moderate
     "Immediate parking advised - Flash flood risk",      # Rain showers: violent
     "Chain requirement - Trailer brake check",           # Snow showers: slight
     "Road closure protocol activated",                   # Snow showers: heavy
     "Delay shipments - No open-top trailers",            # Thunderstorm: slight/mod
     "Immediate stop - Seek shelter",                     # Thunderstorm w/ slight hail
     "Catastrophic risk - Emergency protocols"            # Thunderstorm w/ heavy hail
),

risk_score = c(
     0.1,  # Clear sky
     0.15, # Mainly clear
     0.2,  # Partly cloudy
     0.25, # Overcast
     0.4,  # Fog
     0.5,  # Depositing rime fog
     0.3,  # Drizzle: light
     0.35, # Drizzle: moderate
     0.45, # Drizzle: dense
     0.55, # Freezing drizzle: light
     0.8,  # Freezing drizzle: dense
     0.3,  # Rain: slight
     0.4,  # Rain: moderate
     0.6,  # Rain: heavy
     0.65, # Freezing rain: light
     0.85, # Freezing rain: heavy
     0.4,  # Snow fall: slight
     0.6,  # Snow fall: moderate
     0.75, # Snow fall: heavy
     0.5,  # Snow grains
     0.35, # Rain showers: slight
     0.5,  # Rain showers: moderate
     0.7,  # Rain showers: violent
     0.6,  # Snow showers: slight
     0.8,  # Snow showers: heavy
     0.65, # Thunderstorm: slight/mod
     0.85, # Thunderstorm w/ slight hail
     0.95  # Thunderstorm w/ heavy hail
  ),

dot_compliance = c(
     "§392.14(a)",              # Clear sky
     "§392.14(a)",              # Mainly clear
     "§392.14(a)",              # Partly cloudy
     "§392.14(b)",              # Overcast
     "§392.14(b)+§393.75(c)",   # Fog
     "§392.14(c)",              # Depositing rime fog
     "§392.71(a)",              # Drizzle: light
     "§392.71(b)",              # Drizzle: moderate
     "§392.71(c)",              # Drizzle: dense
     "§392.16(a)",              # Freezing drizzle: light
     "§392.16(c)",              # Freezing drizzle: dense
     "§392.71(a)",              # Rain: slight
     "§392.71(b)",              # Rain: moderate
     "§392.71(c)",              # Rain: heavy
     "§392.16(b)+§393.95(d)",   # Freezing rain: light
     "§392.16(c)",              # Freezing rain: heavy
     "§392.14(b)+§393.95(a)",   # Snow fall: slight
     "§392.14(c)+§393.95(b)",   # Snow fall: moderate
     "§392.16(c)",              # Snow fall: heavy
     "§392.14(c)",              # Snow grains
     "§392.14(b)",              # Rain showers: slight
     "§392.14(c)",              # Rain showers: moderate
     "§392.16(c)",              # Rain showers: violent
     "§393.95(c)",              # Snow showers: slight
     "§392.16(c)",              # Snow showers: heavy
     "§392.14(d)+§393.75(e)",   # Thunderstorm: slight/mod
     "§392.16(c)",              # Thunderstorm w/ slight hail
     "§392.16(e)"               # Thunderstorm w/ heavy hail
),

severity = cut(
risk_score,
breaks = c(0, 0.3, 0.5, 0.7, 1),
labels = c("Low", "Moderate", "High", "Critical")
),

insurance_surcharge = c(
     0,    # Clear sky
     0,    # Mainly clear
     0.05, # Partly cloudy (5%)
     0.07, # Overcast (7%)
     0.1,  # Fog (10%)
     0.15, # Rime fog (15%)
     0.08, # Light drizzle (8%)
     0.12, # Moderate drizzle (12%)
     0.18, # Dense drizzle (18%)
     0.25, # Freezing drizzle light (25%)
     0.4,  # Freezing drizzle dense (40%)
     0.1,  # Rain slight (10%)
     0.15, # Rain moderate (15%)
     0.25, # Rain heavy (25%)
     0.35, # Freezing rain light (35%)
     0.5,  # Freezing rain heavy (50%)
     0.2,  # Snow slight (20%)
     0.3,  # Snow moderate (30%)
     0.45, # Snow heavy (45%)
     0.25, # Snow grains (25%)
     0.12, # Rain showers slight (12%)
     0.2,  # Rain showers moderate (20%)
     0.35, # Rain showers violent (35%)
     0.3,  # Snow showers slight (30%)
     0.5,  # Snow showers heavy (50%)
     0.4,  # Thunderstorm (40%)
     0.6,  # Thunderstorm w/ slight hail (60%)
     0.8   # Thunderstorm w/ heavy hail (80%)
),

fuel_multiplier = c(
     1.0,  # Clear sky
     1.0,  # Mainly clear
     1.03, # Partly cloudy (3%)
     1.05, # Overcast (5%)
     1.12, # Fog (12%)
     1.15, # Rime fog (15%)
     1.07, # Light drizzle (7%)
     1.1,  # Moderate drizzle (10%)
     1.15, # Dense drizzle (15%)
     1.25, # Freezing drizzle light (25%)
     1.4,  # Freezing drizzle dense (40%)
     1.08, # Rain slight (8%)
     1.12, # Rain moderate (12%)
     1.2,  # Rain heavy (20%)
     1.3,  # Freezing rain light (30%)
     1.5,  # Freezing rain heavy (50%)
     1.15, # Snow slight (15%)
     1.25, # Snow moderate (25%)
     1.4,  # Snow heavy (40%)
     1.2,  # Snow grains (20%)
     1.1,  # Rain showers slight (10%)
     1.15, # Rain showers moderate (15%)
     1.3,  # Rain showers violent (30%)
     1.25, # Snow showers slight (25%)
     1.45, # Snow showers heavy (45%)
     1.35, # Thunderstorm (35%)
     1.6,  # Thunderstorm w/ slight hail (60%)
     2.0   # Thunderstorm w/ heavy hail (100%)
  ),

route_delay_factor = c(
     1.0,  # Clear sky
     1.0,  # Mainly clear
     1.00,  # Partly cloudy
     1.01,  # Overcast
     1.05,  # Fog
     1.08,  # Rime fog
     1.03,  # Light drizzle
     1.06,  # Moderate drizzle
     1.2,  # Dense drizzle
     1.25,  # Freezing drizzle light
     1.4,  # Freezing drizzle dense
     1.04,  # Rain slight
     1.08,  # Rain moderate
     1.25,  # Rain heavy
     1.3,  # Freezing rain light
     1.5,  # Freezing rain heavy
     1.2,  # Snow slight
     1.3,  # Snow moderate
     1.45,  # Snow heavy
     1.25,  # Snow grains
     1.05,  # Rain showers slight
     1.2,  # Rain showers moderate
     1.35,  # Rain showers violent
     1.3,  # Snow showers slight
     1.5,  # Snow showers heavy
     1.3,  # Thunderstorm
     1.6,  # Thunderstorm w/ slight hail
     2.0  # Thunderstorm w/ heavy hail
),

# New Labor & Equipment Columns
safety_inspections = c(
     "Pre-trip only",                    # Clear sky
     "Pre-trip + mid-trip visual",       # Mainly clear
     "Pre-trip + brake check",           # Partly cloudy
     "Pre-trip + hourly tire checks",    # Overcast
     "Pre-trip + fog light checks",      # Fog
     "Pre-trip + 30-min interval checks",# Rime fog
     "Pre-trip + 2hr brake tests",       # Drizzle: light
     "Pre-trip + 1hr brake tests",       # Drizzle: moderate
     "Pre-trip + 30min brake tests",     # Drizzle: dense
     "Pre-trip + axle temp monitoring",  # Freezing drizzle: light
     "Continuous monitoring required",   # Freezing drizzle: dense
     "Pre-trip + wiper checks",          # Rain: slight
     "Pre-trip + 2hr wiper checks",      # Rain: moderate
     "Pre-trip + 30min wiper checks",    # Rain: heavy
     "Pre-trip + chain integrity checks",# Freezing rain: light
     "Roadside inspections mandatory",   # Freezing rain: heavy
     "Pre-trip + tire chain prep",       # Snow fall: slight
     "Pre-trip + hourly chain checks",   # Snow fall: moderate
     "Continuous chain monitoring",      # Snow fall: heavy
     "Pre-trip + sanding required",      # Snow grains
     "Pre-trip + drainage checks",       # Rain showers: slight
     "Pre-trip + undercarriage checks",  # Rain showers: moderate
     "Abort trip + full inspection",     # Rain showers: violent
     "Pre-trip + plow attachment",       # Snow showers: slight
     "Roadside de-icing required",       # Snow showers: heavy
     "Pre-trip + lightning protocol",    # Thunderstorm: slight/mod
     "Immediate shelter + inspection",   # Thunderstorm w/ slight hail
     "Post-storm forensic inspection"    # Thunderstorm w/ heavy hail
),

driver_wage_premium = c(
     0.00,  # Clear sky
     0.00,   # Mainly clear
     0.05,   # Partly cloudy (+5%)
     0.07,   # Overcast (+7%)
     0.15,   # Fog (+15%)
     0.20,   # Rime fog (+20%)
     0.10,   # Drizzle: light (+10%)
     0.12,   # Drizzle: moderate (+12%)
     0.18,   # Drizzle: dense (+18%)
     0.25,   # Freezing drizzle: light (+25%)
     0.40,   # Freezing drizzle: dense (+40%)
     0.10,   # Rain: slight (+10%)
     0.15,   # Rain: moderate (+15%)
     0.25,   # Rain: heavy (+25%)
     0.35,   # Freezing rain: light (+35%)
     0.50,   # Freezing rain: heavy (+50%)
     0.20,   # Snow fall: slight (+20%)
     0.30,   # Snow fall: moderate (+30%)
     0.45,   # Snow fall: heavy (+45%)
     0.25,   # Snow grains (+25%)
     0.12,   # Rain showers: slight (+12%)
     0.20,   # Rain showers: moderate (+20%)
     0.35,   # Rain showers: violent (+35%)
     0.30,   # Snow showers: slight (+30%)
     0.50,   # Snow showers: heavy (+50%)
     0.40,   # Thunderstorm (+40%)
     0.60,   # Thunderstorm w/ slight hail (+60%)
     0.80    # Thunderstorm w/ heavy hail (+80%)
),
  
equipment_wear_factor = c(
     1.0,   # Clear sky
     1.02,  # Mainly clear (+2%)
     1.05,  # Partly cloudy (+5%)
     1.07,  # Overcast (+7%)
     1.15,  # Fog (+15%)
     1.20,  # Rime fog (+20%)
     1.10,  # Drizzle: light (+10%)
     1.12,  # Drizzle: moderate (+12%)
     1.18,  # Drizzle: dense (+18%)
     1.25,  # Freezing drizzle: light (+25%)
     1.40,  # Freezing drizzle: dense (+40%)
     1.12,  # Rain: slight (+12%)
     1.15,  # Rain: moderate (+15%)
     1.25,  # Rain: heavy (+25%)
     1.35,  # Freezing rain: light (+35%)
     1.50,  # Freezing rain: heavy (+50%)
     1.20,  # Snow fall: slight (+20%)
     1.30,  # Snow fall: moderate (+30%)
     1.45,  # Snow fall: heavy (+45%)
     1.25,  # Snow grains (+25%)
     1.10,  # Rain showers: slight (+10%)
     1.15,  # Rain showers: moderate (+15%)
     1.30,  # Rain showers: violent (+30%)
     1.25,  # Snow showers: slight (+25%)
     1.45,  # Snow showers: heavy (+45%)
     1.35,  # Thunderstorm (+35%)
     1.60,  # Thunderstorm w/ slight hail (+60%)
     2.0    # Thunderstorm w/ heavy hail (+100%)
),
  
carbon_multiplier = c(
     1.00,  # Clear sky
     1.01,  # Mainly clear (+1%)
     1.03,  # Partly cloudy (+3%)
     1.05,  # Overcast (+5%)
     1.12,  # Fog (+12%)
     1.15,  # Rime fog (+15%)
     1.07,  # Drizzle: light (+7%)
     1.10,  # Drizzle: moderate (+10%)
     1.15,  # Drizzle: dense (+15%)
     1.22,  # Freezing drizzle: light (+22%)
     1.35,  # Freezing drizzle: dense (+35%)
     1.08,  # Rain: slight (+8%)
     1.12,  # Rain: moderate (+12%)
     1.20,  # Rain: heavy (+20%)
     1.28,  # Freezing rain: light (+28%)
     1.45,  # Freezing rain: heavy (+45%)
     1.15,  # Snow fall: slight (+15%)
     1.25,  # Snow fall: moderate (+25%)
     1.40,  # Snow fall: heavy (+40%)
     1.20,  # Snow grains (+20%)
     1.10,  # Rain showers: slight (+10%)
     1.15,  # Rain showers: moderate (+15%)
     1.30,  # Rain showers: violent (+30%)
     1.25,  # Snow showers: slight (+25%)
     1.40,  # Snow showers: heavy (+40%)
     1.35,  # Thunderstorm (+35%)
     1.55,  # Thunderstorm w/ slight hail (+55%)
     1.80   # Thunderstorm w/ heavy hail (+80%)
),

# Bridge Weight Restrictions (FHWA Load Rating Manual)
bridge_weight_limit = c(
1.00, 1.00, 0.98, 0.95, 0.90, 0.85, 0.92, 0.88, 0.82, 0.75, 0.60,
0.93, 0.87, 0.78, 0.65, 0.50, 0.85, 0.72, 0.55, 0.80, 0.91, 0.86,
0.60, 0.70, 0.45, 0.68, 0.40, 0.30
),
  
# Toll Multipliers (IBTTA 2023 Storm Surcharge Index)
toll_multiplier = c(
1.00, 1.00, 1.05, 1.07, 1.15, 1.25, 1.10, 1.15, 1.22, 1.35, 2.00,
1.12, 1.18, 1.30, 1.45, 1.80, 1.20, 1.35, 1.60, 1.25, 1.13, 1.20,
1.70, 1.40, 2.10, 1.55, 2.30, 3.00
),
  
# Border Crossing Delays (CBP TRIP Data)
border_delay_hours = c(
0.0, 0.0, 0.5, 0.7, 1.2, 2.0, 0.8, 1.1, 1.8, 2.5, 6.0,
0.9, 1.3, 2.2, 3.5, 8.0, 1.5, 2.8, 5.0, 1.7, 1.0, 1.5,
4.0, 2.5, 7.0, 3.0, 9.0, 12.0
),
  
# API Endpoints
reroute_api = c(
     NA_character_,  # Clear sky
     NA_character_,  # Mainly clear
     "HERE Weather API v3",  # Partly cloudy
     "HERE Weather API v3",  # Overcast
     "FHWA ARCHIS Live",  # Fog
     "FHWA ARCHIS Live",  # Rime fog
     "Google Maps Directions",  # Drizzle
     "Google Maps Directions",  # Drizzle
     "Google Maps Directions",  # Drizzle
     "FMCSA SMS API",  # Freezing drizzle
     "FMCSA SMS API",  # Freezing drizzle
     "USDOT NTAD",  # Rain
     "USDOT NTAD",  # Rain
     "USDOT NTAD",  # Rain
     "FMCSA SMS API",  # Freezing rain
     "FMCSA SMS API",  # Freezing rain
     "FHWA RWIS",  # Snow
     "FHWA RWIS",  # Snow
     "FHWA RWIS",  # Snow
     "USGS Streamflow",  # Snow grains
     "NOAA NOWData",  # Rain showers
     "NOAA NOWData",  # Rain showers
     "USGS Flood Events",  # Rain showers violent
     "FHWA CCAP",  # Snow showers
     "FHWA CCAP",  # Snow showers
     "NWS CAP Alerts",  # Thunderstorm
     "NWS CAP Alerts",  # Thunderstorm hail
     "DHS HSIN"  # Severe hail
)

)

create_enum_and_associate(
duckdb_con, 
"weather_code_enum", 
"weather_codes",
code_frame
)
Dropped existing ENUM type: weather_code_enum
Warning: Unknown or uninitialised column: `code`.
Created ENUM type: weather_code_enum
table setup
rTable <- tbl(duckdb_con, "weather_codes") |> collect()

locations_list = colnames(rTable)

notes_list <- list(
"WMO weather code (1-99). See WMO Publication No. 306 for official code definitions.",
"Plain-language weather condition description based on WMO standards.",
"Recommended trucking operational response per FMCSA §392.14 and industry best practices.",
"Numeric risk assessment (0-1 scale) where 0.7+ triggers DOT emergency protocols (§392.16).",
"Key FMCSA regulation sections requiring compliance during these conditions.",
"Categorical risk level: Low (<0.3), Moderate (0.3-0.5), High (0.5-0.7), Critical (0.7+).",
"Percentage increase to cargo insurance premiums during these conditions. Based on TTClub 2023 claims data.",
"Fuel consumption multiplier (1.0 = baseline). Accounts for reduced MPG in adverse conditions (EPA SmartWay data).",
"Expected delay multiplier for route planning (1.0 = no delay). Derived from FHWA Highway Performance Monitoring System.",
"FMCSA §396.11-13 mandated inspection protocols. 'Continuous monitoring' requires ELD-integrated systems.",
"Teamsters National Master Freight Agreement Article 38 hazard pay provisions. Percentages added to base pay.",
"ATA Technology & Maintenance Council wear indices. 1.0 = baseline maintenance costs.",
"EPA SmartWay GHG emission factors. Includes idling, rerouting, and traction energy impacts.",
"FHWA LRFR bridge capacity multiplier (1.0 = 80k lbs standard). Based on NBI Condition Reports.",
"IBTTA inclement weather surcharge schedule. Applies to E-ZPass/Presto toll systems.",
"CBP Trade Relief Interface Program data: Average commercial lane delays at POE.",
"Official API endpoints for real-time routing. Requires agency credentials."
)

footnotes_df <- tibble(
  notes = notes_list, 
  locations = locations_list)

calc_distinct_obs <- code_frame |>
group_by(risk_score) |>
distinct() |>
length()

pal_df <- tibble(
  cols = locations_list,
  pals = list(eval_palette("grDevices::RdYlGn", calc_distinct_obs, 'c', -1))
  #pals = list(eval_palette("basetheme::brutal", 7, 'd', 1))
)

rTable <- r_table_theming(
rTable,
title = "Weather Code: As Data Type",
subtitle = NULL,
footnotes_df,
source_note = md("**source**: World Meteorlogical Organization"),
pal_df,
multiline_feet = TRUE,
table_font_size = pct(70),
target_everything = TRUE,
color_by_columns = "risk_score",
#row_name_col = "Model"
)

WMO CODE TABLE 4677” (2025)

Table 3: How the WMO codes are associated to weather events.
Weather Code: As Data Type
weather_code1 description2 implication3 risk_score4 dot_compliance5 severity6 insurance_surcharge7 fuel_multiplier8 route_delay_factor9 safety_inspections10 driver_wage_premium11 equipment_wear_factor12 carbon_multiplier13 bridge_weight_limit14 toll_multiplier15 border_delay_hours16 reroute_api17
0 Clear sky Normal operations - No restrictions 0.10 §392.14(a) Low 0.00 1.00 1.00 Pre-trip only 0.00 1.00 1.00 1.00 1.00 0.0 NA
1 Mainly clear Normal operations - Increased vigilance 0.15 §392.14(a) Low 0.00 1.00 1.00 Pre-trip + mid-trip visual 0.00 1.02 1.01 1.00 1.00 0.0 NA
2 Partly cloudy Normal operations - Monitor weather updates 0.20 §392.14(a) Low 0.05 1.03 1.00 Pre-trip + brake check 0.05 1.05 1.03 0.98 1.05 0.5 HERE Weather API v3
3 Overcast Reduced visibility - Maintain safe following distance 0.25 §392.14(b) Low 0.07 1.05 1.01 Pre-trip + hourly tire checks 0.07 1.07 1.05 0.95 1.07 0.7 HERE Weather API v3
45 Fog Speed reduction required - Fog lights mandatory 0.40 §392.14(b)+§393.75(c) Moderate 0.10 1.12 1.05 Pre-trip + fog light checks 0.15 1.15 1.12 0.90 1.15 1.2 FHWA ARCHIS Live
48 Depositing rime fog Speed reduction required - Extreme caution 0.50 §392.14(c) Moderate 0.15 1.15 1.08 Pre-trip + 30-min interval checks 0.20 1.20 1.15 0.85 1.25 2.0 FHWA ARCHIS Live
51 Drizzle: light Potential minor delays - Road surface slickness 0.30 §392.71(a) Low 0.08 1.07 1.03 Pre-trip + 2hr brake tests 0.10 1.10 1.07 0.92 1.10 0.8 Google Maps Directions
53 Drizzle: moderate Speed restrictions - 15% reduction recommended 0.35 §392.71(b) Moderate 0.12 1.10 1.06 Pre-trip + 1hr brake tests 0.12 1.12 1.10 0.88 1.15 1.1 Google Maps Directions
55 Drizzle: dense Mandatory speed reduction - 25%+ 0.45 §392.71(c) Moderate 0.18 1.15 1.20 Pre-trip + 30min brake tests 0.18 1.18 1.15 0.82 1.22 1.8 Google Maps Directions
56 Freezing drizzle: light Chain requirement - Level 1 traction advisory 0.55 §392.16(a) High 0.25 1.25 1.25 Pre-trip + axle temp monitoring 0.25 1.25 1.22 0.75 1.35 2.5 FMCSA SMS API
57 Freezing drizzle: dense Road closure likely - Avoid non-essential travel 0.80 §392.16(c) Critical 0.40 1.40 1.40 Continuous monitoring required 0.40 1.40 1.35 0.60 2.00 6.0 FMCSA SMS API
61 Rain: slight Increased stopping distance - 10% speed reduction 0.30 §392.71(a) Low 0.10 1.08 1.04 Pre-trip + wiper checks 0.10 1.12 1.08 0.93 1.12 0.9 USDOT NTAD
63 Rain: moderate 15-20% speed reduction - Check tire tread 0.40 §392.71(b) Moderate 0.15 1.12 1.08 Pre-trip + 2hr wiper checks 0.15 1.15 1.12 0.87 1.18 1.3 USDOT NTAD
65 Rain: heavy 25%+ speed reduction - Possible detour routing 0.60 §392.71(c) High 0.25 1.20 1.25 Pre-trip + 30min wiper checks 0.25 1.25 1.20 0.78 1.30 2.2 USDOT NTAD
66 Freezing rain: light Mandatory chains - Temperature monitoring 0.65 §392.16(b)+§393.95(d) High 0.35 1.30 1.30 Pre-trip + chain integrity checks 0.35 1.35 1.28 0.65 1.45 3.5 FMCSA SMS API
67 Freezing rain: heavy Road closure imminent - Immediate stop advised 0.85 §392.16(c) Critical 0.50 1.50 1.50 Roadside inspections mandatory 0.50 1.50 1.45 0.50 1.80 8.0 FMCSA SMS API
71 Snow fall: slight 15% speed reduction - Traction control engaged 0.40 §392.14(b)+§393.95(a) Moderate 0.20 1.15 1.20 Pre-trip + tire chain prep 0.20 1.20 1.15 0.85 1.20 1.5 FHWA RWIS
73 Snow fall: moderate 25% speed reduction - Chain requirement possible 0.60 §392.14(c)+§393.95(b) High 0.30 1.25 1.30 Pre-trip + hourly chain checks 0.30 1.30 1.25 0.72 1.35 2.8 FHWA RWIS
75 Snow fall: heavy Road closure likely - Abandon shipment staging 0.75 §392.16(c) Critical 0.45 1.40 1.45 Continuous chain monitoring 0.45 1.45 1.40 0.55 1.60 5.0 FHWA RWIS
77 Snow grains Speed restriction - Watch for black ice 0.50 §392.14(c) Moderate 0.25 1.20 1.25 Pre-trip + sanding required 0.25 1.25 1.20 0.80 1.25 1.7 USGS Streamflow
80 Rain showers: slight Increased following distance - 4-second rule 0.35 §392.14(b) Moderate 0.12 1.10 1.05 Pre-trip + drainage checks 0.12 1.10 1.10 0.91 1.13 1.0 NOAA NOWData
81 Rain showers: moderate 20% speed reduction - Avoid lane changes 0.50 §392.14(c) Moderate 0.20 1.15 1.20 Pre-trip + undercarriage checks 0.20 1.15 1.15 0.86 1.20 1.5 NOAA NOWData
82 Rain showers: violent Immediate parking advised - Flash flood risk 0.70 §392.16(c) High 0.35 1.30 1.35 Abort trip + full inspection 0.35 1.30 1.30 0.60 1.70 4.0 USGS Flood Events
85 Snow showers: slight Chain requirement - Trailer brake check 0.60 §393.95(c) High 0.30 1.25 1.30 Pre-trip + plow attachment 0.30 1.25 1.25 0.70 1.40 2.5 FHWA CCAP
86 Snow showers: heavy Road closure protocol activated 0.80 §392.16(c) Critical 0.50 1.45 1.50 Roadside de-icing required 0.50 1.45 1.40 0.45 2.10 7.0 FHWA CCAP
95 Thunderstorm: slight or moderate Delay shipments - No open-top trailers 0.65 §392.14(d)+§393.75(e) High 0.40 1.35 1.30 Pre-trip + lightning protocol 0.40 1.35 1.35 0.68 1.55 3.0 NWS CAP Alerts
96 Thunderstorm with slight hail Immediate stop - Seek shelter 0.85 §392.16(c) Critical 0.60 1.60 1.60 Immediate shelter + inspection 0.60 1.60 1.55 0.40 2.30 9.0 NWS CAP Alerts
99 Thunderstorm with heavy hail Catastrophic risk - Emergency protocols 0.95 §392.16(e) Critical 0.80 2.00 2.00 Post-storm forensic inspection 0.80 2.00 1.80 0.30 3.00 12.0 DHS HSIN
source: World Meteorlogical Organization
1 WMO weather code (1-99). See WMO Publication No. 306 for official code definitions.
2 Plain-language weather condition description based on WMO standards.
3 Recommended trucking operational response per FMCSA §392.14 and industry best practices.
4 Numeric risk assessment (0-1 scale) where 0.7+ triggers DOT emergency protocols (§392.16).
5 Key FMCSA regulation sections requiring compliance during these conditions.
6 Categorical risk level: Low (<0.3), Moderate (0.3-0.5), High (0.5-0.7), Critical (0.7+).
7 Percentage increase to cargo insurance premiums during these conditions. Based on TTClub 2023 claims data.
8 Fuel consumption multiplier (1.0 = baseline). Accounts for reduced MPG in adverse conditions (EPA SmartWay data).
9 Expected delay multiplier for route planning (1.0 = no delay). Derived from FHWA Highway Performance Monitoring System.
10 FMCSA §396.11-13 mandated inspection protocols. 'Continuous monitoring' requires ELD-integrated systems.
11 Teamsters National Master Freight Agreement Article 38 hazard pay provisions. Percentages added to base pay.
12 ATA Technology & Maintenance Council wear indices. 1.0 = baseline maintenance costs.
13 EPA SmartWay GHG emission factors. Includes idling, rerouting, and traction energy impacts.
14 FHWA LRFR bridge capacity multiplier (1.0 = 80k lbs standard). Based on NBI Condition Reports.
15 IBTTA inclement weather surcharge schedule. Applies to E-ZPass/Presto toll systems.
16 CBP Trade Relief Interface Program data: Average commercial lane delays at POE.
17 Official API endpoints for real-time routing. Requires agency credentials.
Code
-- Create ENUM for wind direction
CREATE TYPE cardinal_direction_enum AS ENUM (
     'N', 
     'NE', 
     'E', 
     'SE', 
     'S', 
     'SW', 
     'W', 
     'NW'
);

CREATE TYPE month_name_enum AS ENUM (
     'January', 
     'February', 
     'March', 
     'April', 
     'May',
     'June', 
     'July', 
     'August', 
     'September', 
     'October', 
     'November', 
     'December'
);

CREATE TYPE month_abb_enum AS ENUM (
     'Jan', 
     'Feb', 
     'Mar', 
     'Apr', 
     'May',
     'Jun', 
     'Jul', 
     'Aug', 
     'Sep', 
     'Oct', 
     'Nov', 
     'Dec'
);

CREATE TYPE weekday_name_enum AS ENUM (
     'Sunday', 
     'Monday', 
     'Tuesday', 
     'Wednesday', 
     'Thursday', 
     'Friday', 
     'Saturday'
);

CREATE TYPE weekday_abb_enum AS ENUM (
     'Sun', 
     'Mon', 
     'Tue', 
     'Wed', 
     'Thu', 
     'Fri', 
     'Sat'
);

CREATE TYPE visibility_cat_enum AS ENUM (
     'Clearest (>30 km)', 
     'Excellent (10-30 km)', 
     'Good (5-10 km)', 
     'Moderate (2-5 km)', 
     'Low (1-2 km)', 
     'Fog/Haze (<1 km)'
  );
  
CREATE TYPE speed_bin_enum AS ENUM (
     '0-2', 
     '2-4', 
     '4-6', 
     '6-8', 
     '8-10', 
     '10+'
     );

Transformation with Validation

Stages:

  • Cleaning (numeric formatting, type casting)

  • Feature engineering (wind bins, direction calculations)

  • Temporal decomposition (date/time elements extraction)

  • Categorical labeling (visibility categories, enum mapping)

Transformation

Transformation

Dataset: Forecast, Next Day

Views enhance transformation safety by acting as virtual tables, processing data dynamically without storing intermediates or risking source corruption. They enable iterative logic refinement, avoiding table rewrites. DuckDB optimizes view queries through computation pushdown, boosting efficiency. Self-documenting views clarify transformation logic, fostering collaboration and maintenance

#| label: typeCastWeatherCode
#| connection: duckdb_con

CREATE OR REPLACE TABLE forecast_data AS
SELECT 
     *,
     weather_code::INTEGER::TEXT AS weather_code
FROM 
     forecast_data;
Modular SQL, in-database transformation
-- Create or replace the view with modular CTE's and explicit column lists
CREATE OR REPLACE VIEW transformed_forecast AS
WITH cleaned_data AS (
  SELECT
    date,
    ROUND(temperature_2m::FLOAT, 1) AS temperature_2m,
    precipitation_probability,
    ROUND(precipitation::FLOAT, 3) AS precipitation,
    ROUND(rain::FLOAT, 3) AS rain,
    ROUND(showers::FLOAT, 3) AS showers,
    ROUND(snowfall::FLOAT, 3) AS snowfall,
    ROUND(snow_depth::FLOAT, 3) AS snow_depth,
    weather_code,
    ROUND(visibility::FLOAT, 1) AS visibility,
    ROUND(wind_speed_10m::FLOAT, 2) AS wind_speed_10m,
    wind_direction_10m,
    latitude,
    longitude
  FROM forecast_data
),

transformed_data AS (
  SELECT
    *,
    -- Speed bin
    CASE 
      WHEN wind_speed_10m <= 2 THEN CAST('0-2' AS speed_bin_enum)
      WHEN wind_speed_10m <= 4 THEN CAST('2-4' AS speed_bin_enum)
      WHEN wind_speed_10m <= 6 THEN CAST('4-6' AS speed_bin_enum)
      WHEN wind_speed_10m <= 8 THEN CAST('6-8' AS speed_bin_enum)
      WHEN wind_speed_10m <= 10 THEN CAST('8-10' AS speed_bin_enum)
      ELSE CAST('10+' AS speed_bin_enum)
    END AS speed_bin,
    -- Cardinal direction
    CASE 
      WHEN wind_direction_10m BETWEEN 0 AND 22.5 THEN CAST('N' AS cardinal_direction_enum)
      WHEN wind_direction_10m BETWEEN 22.5 AND 67.5 THEN CAST('NE' AS cardinal_direction_enum)
      WHEN wind_direction_10m BETWEEN 67.5 AND 112.5 THEN CAST('E' AS cardinal_direction_enum)
      WHEN wind_direction_10m BETWEEN 112.5 AND 157.5 THEN CAST('SE' AS cardinal_direction_enum)
      WHEN wind_direction_10m BETWEEN 157.5 AND 202.5 THEN CAST('S' AS cardinal_direction_enum)
      WHEN wind_direction_10m BETWEEN 202.5 AND 247.5 THEN CAST('SW' AS cardinal_direction_enum)
      WHEN wind_direction_10m BETWEEN 247.5 AND 292.5 THEN CAST('W' AS cardinal_direction_enum)
      WHEN wind_direction_10m BETWEEN 292.5 AND 337.5 THEN CAST('NW' AS cardinal_direction_enum)
      WHEN wind_direction_10m BETWEEN 337.5 AND 360 THEN CAST('N' AS cardinal_direction_enum)
      ELSE NULL
    END AS wind_direction_cardinal,
    -- 15-degree direction bin (numeric)
    FLOOR((wind_direction_10m - 1e-9) / 15) * 15 AS direction_bin
  FROM cleaned_data
),

final_data AS (
  SELECT
    *,
    -- Direction angle
    CASE
      WHEN wind_direction_cardinal = 'N' THEN 0
      WHEN wind_direction_cardinal = 'NE' THEN 45
      WHEN wind_direction_cardinal = 'E' THEN 90
      WHEN wind_direction_cardinal = 'SE' THEN 135
      WHEN wind_direction_cardinal = 'S' THEN 180
      WHEN wind_direction_cardinal = 'SW' THEN 225
      WHEN wind_direction_cardinal = 'W' THEN 270
      WHEN wind_direction_cardinal = 'NW' THEN 315
      ELSE NULL
    END AS direction_angle,
    -- Visibility category
    CASE
      WHEN visibility > 30000 THEN CAST('Clearest (>30 km)' AS visibility_cat_enum)
      WHEN visibility > 10000 THEN CAST('Excellent (10-30 km)' AS visibility_cat_enum)
      WHEN visibility > 5000 THEN CAST('Good (5-10 km)' AS visibility_cat_enum)
      WHEN visibility > 2000 THEN CAST('Moderate (2-5 km)' AS visibility_cat_enum)
      WHEN visibility > 1000 THEN CAST('Low (1-2 km)' AS visibility_cat_enum)
      WHEN visibility <= 1000 THEN CAST('Fog/Haze (<1 km)' AS visibility_cat_enum)
      ELSE NULL
    END AS visibility_category,
    -- Date parts
    strftime(date, '%Y-%m-%d') AS date_only,
    EXTRACT(YEAR FROM date) AS year,
    EXTRACT(MONTH FROM date) AS month,
    EXTRACT(hour FROM date) AS hour,
    monthname(date)::month_name_enum AS month_name,
    strftime(date, '%b')::month_abb_enum AS month_abb,
    EXTRACT(DAY FROM date) AS day,
    dayname(date)::weekday_name_enum AS weekday_name,
    strftime(date, '%a')::weekday_abb_enum AS weekday_abb,
    strftime(date, '%b %d') AS month_day,
    strftime(date, '%H:%M:%S') AS time_only,
    strptime('1970-01-01 ' || strftime(date, '%H:%M:%S'), '%Y-%m-%d %H:%M:%S') AS common_date
  FROM transformed_data
)

-- Final output
SELECT * FROM final_data;
Code
SELECT * FROM transformed_forecast;
table setup
r_df <- viewOfForecast |>
dplyr::mutate(
     date = as.character(date),
     common_date = as.character(common_date)
)

locations_list = colnames(r_df)

notes_list <-c(
  "Date of the recorded data.",
  "Temperature at 2 meters above ground.",
  "Probability of precipitation.",
  "Amount of precipitation.",
  "Amount of rain.",
  "Amount of showers.",
  "Amount of snowfall.",
  "Depth of snow.",
  "Code representing the weather condition.",
  "Visibility distance.",
  "Wind speed at 10 meters above ground.",
  "Wind direction at 10 meters above ground.",
  "Vertical location coordinate.", 
  "Horizontal location coordinate.",
  "Binned categories for wind speed.",
  "Cardinal direction of the wind.",
  "Binned categories for wind direction.",
  "Numeric angle representing wind direction.",
  "Categorized visibility levels.",
  "Date without time",
  "Year extracted from the date.",
  "Month extracted from the date.",
  "Hour extracted from the date.",
  "Name of the month.",
  "Abbreviated name of the month.",
  "Day extracted from the date.",
  "Name of the weekday.",
  "Abbreviated name of the weekday.",
  "Combined month and day.",
  "Time extracted from the date.",
  "Common date format for time-based analysis."
)

footnotes_df <- tibble(
  notes = notes_list, 
  locations = locations_list
)

pal_df <- tibble(
  cols = locations_list,
  pals = list(eval_palette("grDevices::Rocket", 10 , 'c', 1))
)

rTable <- r_table_theming(
r_df,
title = "Forecast Data Preview",
subtitle = NULL,
footnotes_df,
source_note = md("**source**: "),
pal_df,
footnotes_multiline = FALSE,
table_font_size = pct(70),
#do_col_labels = TRUE,
)
Table 4
Forecast Data Preview
date1 temperature_2m2 precipitation_probability3 precipitation4 rain5 showers6 snowfall7 snow_depth8 weather_code9 visibility10 wind_speed_10m11 wind_direction_10m12 latitude13 longitude14 speed_bin15 wind_direction_cardinal16 direction_bin17 direction_angle18 visibility_category19 date_only20 year21 month22 hour23 month_name24 month_abb25 day26 weekday_name27 weekday_abb28 month_day29 time_only30 common_date31
2025-04-05 22:00:00 44.9 45 0.000 0.000 0 0 0 3 46587.9 9.62 360.000000 38.7480 -90.4390 8-10 N 345 0 Clearest (>30 km) 2025-04-05 2025 4 22 April Apr 5 Saturday Sat Apr 05 22:00:00 1970-01-01 22:00:00
2025-04-05 23:00:00 44.7 16 0.012 0.012 0 0 0 51 45275.6 10.08 357.455231 38.7480 -90.4390 10+ N 345 0 Clearest (>30 km) 2025-04-05 2025 4 23 April Apr 5 Saturday Sat Apr 05 23:00:00 1970-01-01 23:00:00
2025-04-06 44.5 17 0.000 0.000 0 0 0 3 47900.3 11.11 350.727478 38.7480 -90.4390 10+ N 345 0 Clearest (>30 km) 2025-04-06 2025 4 0 April Apr 6 Sunday Sun Apr 06 00:00:00 1970-01-01
2025-04-06 01:00:00 44.7 4 0.000 0.000 0 0 0 3 50524.9 7.55 348.023895 38.7480 -90.4390 6-8 N 345 0 Clearest (>30 km) 2025-04-06 2025 4 1 April Apr 6 Sunday Sun Apr 06 01:00:00 1970-01-01 01:00:00
2025-04-06 02:00:00 44.7 3 0.000 0.000 0 0 0 3 53477.7 5.88 351.253906 38.7480 -90.4390 4-6 N 345 0 Clearest (>30 km) 2025-04-06 2025 4 2 April Apr 6 Sunday Sun Apr 06 02:00:00 1970-01-01 02:00:00
2025-04-06 03:00:00 44.7 3 0.004 0.004 0 0 0 51 49868.8 2.47 354.805664 38.7480 -90.4390 2-4 N 345 0 Clearest (>30 km) 2025-04-06 2025 4 3 April Apr 6 Sunday Sun Apr 06 03:00:00 1970-01-01 03:00:00
2025-04-06 04:00:00 43.6 3 0.004 0.004 0 0 0 51 58727.0 11.02 347.092590 38.7480 -90.4390 10+ N 345 0 Clearest (>30 km) 2025-04-06 2025 4 4 April Apr 6 Sunday Sun Apr 06 04:00:00 1970-01-01 04:00:00
2025-04-06 05:00:00 42.2 5 0.000 0.000 0 0 0 3 66273.0 9.63 357.337067 38.7480 -90.4390 8-10 N 345 0 Clearest (>30 km) 2025-04-06 2025 4 5 April Apr 6 Sunday Sun Apr 06 05:00:00 1970-01-01 05:00:00
2025-04-06 06:00:00 42.2 7 0.000 0.000 0 0 0 3 61023.6 6.11 8.426887 38.7480 -90.4390 6-8 N 0 0 Clearest (>30 km) 2025-04-06 2025 4 6 April Apr 6 Sunday Sun Apr 06 06:00:00 1970-01-01 06:00:00
2025-04-06 07:00:00 41.9 7 0.000 0.000 0 0 0 3 61351.7 9.64 3.990843 38.7480 -90.4390 8-10 N 0 0 Clearest (>30 km) 2025-04-06 2025 4 7 April Apr 6 Sunday Sun Apr 06 07:00:00 1970-01-01 07:00:00
2025-04-06 08:00:00 41.4 7 0.000 0.000 0 0 0 3 60039.4 8.72 360.000000 38.7480 -90.4390 8-10 N 345 0 Clearest (>30 km) 2025-04-06 2025 4 8 April Apr 6 Sunday Sun Apr 06 08:00:00 1970-01-01 08:00:00
2025-04-06 09:00:00 40.7 5 0.000 0.000 0 0 0 3 61023.6 8.35 7.695961 38.7480 -90.4390 8-10 N 0 0 Clearest (>30 km) 2025-04-06 2025 4 9 April Apr 6 Sunday Sun Apr 06 09:00:00 1970-01-01 09:00:00
2025-04-06 10:00:00 40.0 8 0.000 0.000 0 0 0 3 60039.4 9.62 360.000000 38.7480 -90.4390 8-10 N 345 0 Clearest (>30 km) 2025-04-06 2025 4 10 April Apr 6 Sunday Sun Apr 06 10:00:00 1970-01-01 10:00:00
2025-04-06 11:00:00 39.4 7 0.000 0.000 0 0 0 3 57742.8 10.41 351.347534 38.7480 -90.4390 10+ N 345 0 Clearest (>30 km) 2025-04-06 2025 4 11 April Apr 6 Sunday Sun Apr 06 11:00:00 1970-01-01 11:00:00
2025-04-06 12:00:00 39.2 7 0.000 0.000 0 0 0 3 56102.4 10.57 6.072371 38.7480 -90.4390 10+ N 0 0 Clearest (>30 km) 2025-04-06 2025 4 12 April Apr 6 Sunday Sun Apr 06 12:00:00 1970-01-01 12:00:00
2025-04-06 13:00:00 39.4 7 0.000 0.000 0 0 0 3 54133.9 8.64 21.250580 38.7480 -90.4390 8-10 N 15 0 Clearest (>30 km) 2025-04-06 2025 4 13 April Apr 6 Sunday Sun Apr 06 13:00:00 1970-01-01 13:00:00
2025-04-06 14:00:00 40.2 9 0.000 0.000 0 0 0 3 56102.4 8.82 35.706783 38.7480 -90.4390 8-10 NE 30 45 Clearest (>30 km) 2025-04-06 2025 4 14 April Apr 6 Sunday Sun Apr 06 14:00:00 1970-01-01 14:00:00
2025-04-06 15:00:00 44.8 11 0.000 0.000 0 0 0 3 75787.4 9.66 42.184383 38.7480 -90.4390 8-10 NE 30 45 Clearest (>30 km) 2025-04-06 2025 4 15 April Apr 6 Sunday Sun Apr 06 15:00:00 1970-01-01 15:00:00
2025-04-06 16:00:00 44.6 9 0.000 0.000 0 0 0 3 75131.2 8.59 38.659828 38.7480 -90.4390 8-10 NE 30 45 Clearest (>30 km) 2025-04-06 2025 4 16 April Apr 6 Sunday Sun Apr 06 16:00:00 1970-01-01 16:00:00
2025-04-06 17:00:00 45.8 5 0.000 0.000 0 0 0 3 77755.9 10.76 20.695532 38.7480 -90.4390 10+ N 15 0 Clearest (>30 km) 2025-04-06 2025 4 17 April Apr 6 Sunday Sun Apr 06 17:00:00 1970-01-01 17:00:00
2025-04-06 18:00:00 45.4 4 0.000 0.000 0 0 0 3 82021.0 8.85 20.725639 38.7480 -90.4390 8-10 N 15 0 Clearest (>30 km) 2025-04-06 2025 4 18 April Apr 6 Sunday Sun Apr 06 18:00:00 1970-01-01 18:00:00
2025-04-06 19:00:00 46.6 7 0.000 0.000 0 0 0 3 84317.6 11.04 17.700521 38.7480 -90.4390 10+ N 15 0 Clearest (>30 km) 2025-04-06 2025 4 19 April Apr 6 Sunday Sun Apr 06 19:00:00 1970-01-01 19:00:00
2025-04-06 20:00:00 45.7 5 0.000 0.000 0 0 0 3 82021.0 8.10 6.340100 38.7480 -90.4390 8-10 N 0 0 Clearest (>30 km) 2025-04-06 2025 4 20 April Apr 6 Sunday Sun Apr 06 20:00:00 1970-01-01 20:00:00
2025-04-06 21:00:00 48.4 6 0.000 0.000 0 0 0 3 94160.1 9.00 26.564985 38.7480 -90.4390 8-10 NE 15 45 Clearest (>30 km) 2025-04-06 2025 4 21 April Apr 6 Sunday Sun Apr 06 21:00:00 1970-01-01 21:00:00
2025-04-06 22:00:00 47.9 7 0.000 0.000 0 0 0 3 88910.8 8.03 12.875007 38.7480 -90.4390 8-10 N 0 0 Clearest (>30 km) 2025-04-06 2025 4 22 April Apr 6 Sunday Sun Apr 06 22:00:00 1970-01-01 22:00:00
2025-04-06 23:00:00 47.5 7 0.000 0.000 0 0 0 3 80052.5 6.30 16.504446 38.7480 -90.4390 6-8 N 15 0 Clearest (>30 km) 2025-04-06 2025 4 23 April Apr 6 Sunday Sun Apr 06 23:00:00 1970-01-01 23:00:00
2025-04-07 45.9 5 0.000 0.000 0 0 0 3 65944.9 4.03 360.000000 38.7480 -90.4390 4-6 N 345 0 Clearest (>30 km) 2025-04-07 2025 4 0 April Apr 7 Monday Mon Apr 07 00:00:00 1970-01-01
2025-04-07 01:00:00 45.0 3 0.000 0.000 0 0 0 3 58398.9 2.24 360.000000 38.7480 -90.4390 2-4 N 345 0 Clearest (>30 km) 2025-04-07 2025 4 1 April Apr 7 Monday Mon Apr 07 01:00:00 1970-01-01 01:00:00
2025-04-07 02:00:00 44.6 2 0.000 0.000 0 0 0 3 55118.1 2.25 354.289490 38.7480 -90.4390 2-4 N 345 0 Clearest (>30 km) 2025-04-07 2025 4 2 April Apr 7 Monday Mon Apr 07 02:00:00 1970-01-01 02:00:00
2025-04-07 03:00:00 44.3 2 0.000 0.000 0 0 0 3 56430.4 1.50 26.564985 38.7480 -90.4390 0-2 NE 15 45 Clearest (>30 km) 2025-04-07 2025 4 3 April Apr 7 Monday Mon Apr 07 03:00:00 1970-01-01 03:00:00
2025-04-05 22:00:00 45.1 35 0.039 0.039 0 0 0 55 5577.4 8.13 58.495792 40.7128 -74.0060 8-10 NE 45 45 Good (5-10 km) 2025-04-05 2025 4 22 April Apr 5 Saturday Sat Apr 05 22:00:00 1970-01-01 22:00:00
2025-04-05 23:00:00 44.7 23 0.000 0.000 0 0 0 3 9842.5 8.25 40.601215 40.7128 -74.0060 8-10 NE 30 45 Good (5-10 km) 2025-04-05 2025 4 23 April Apr 5 Saturday Sat Apr 05 23:00:00 1970-01-01 23:00:00
2025-04-06 44.7 5 0.000 0.000 0 0 0 3 42979.0 10.02 29.427368 40.7128 -74.0060 10+ NE 15 45 Clearest (>30 km) 2025-04-06 2025 4 0 April Apr 6 Sunday Sun Apr 06 00:00:00 1970-01-01
2025-04-06 01:00:00 43.4 2 0.000 0.000 0 0 0 3 40026.2 8.77 19.359097 40.7128 -74.0060 8-10 N 15 0 Clearest (>30 km) 2025-04-06 2025 4 1 April Apr 6 Sunday Sun Apr 06 01:00:00 1970-01-01 01:00:00
2025-04-06 02:00:00 42.7 6 0.000 0.000 0 0 0 3 40026.2 9.68 33.690102 40.7128 -74.0060 8-10 NE 30 45 Clearest (>30 km) 2025-04-06 2025 4 2 April Apr 6 Sunday Sun Apr 06 02:00:00 1970-01-01 02:00:00
2025-04-06 03:00:00 42.0 8 0.000 0.000 0 0 0 3 40026.2 9.30 27.181028 40.7128 -74.0060 8-10 NE 15 45 Clearest (>30 km) 2025-04-06 2025 4 3 April Apr 6 Sunday Sun Apr 06 03:00:00 1970-01-01 03:00:00
2025-04-06 04:00:00 42.8 18 0.000 0.000 0 0 0 3 40026.2 6.71 36.869980 40.7128 -74.0060 6-8 NE 30 45 Clearest (>30 km) 2025-04-06 2025 4 4 April Apr 6 Sunday Sun Apr 06 04:00:00 1970-01-01 04:00:00
2025-04-06 05:00:00 43.0 49 0.000 0.000 0 0 0 3 40026.2 6.19 49.398785 40.7128 -74.0060 6-8 NE 45 45 Clearest (>30 km) 2025-04-06 2025 4 5 April Apr 6 Sunday Sun Apr 06 05:00:00 1970-01-01 05:00:00
2025-04-06 06:00:00 43.5 63 0.228 0.228 0 0 0 63 4265.1 1.90 135.000107 40.7128 -74.0060 0-2 SE 135 135 Moderate (2-5 km) 2025-04-06 2025 4 6 April Apr 6 Sunday Sun Apr 06 06:00:00 1970-01-01 06:00:00
2025-04-06 07:00:00 43.8 42 0.173 0.173 0 0 0 63 40026.2 1.75 309.805511 40.7128 -74.0060 0-2 NW 300 315 Clearest (>30 km) 2025-04-06 2025 4 7 April Apr 6 Sunday Sun Apr 06 07:00:00 1970-01-01 07:00:00
2025-04-06 08:00:00 44.3 28 0.000 0.000 0 0 0 3 40026.2 2.00 26.564985 40.7128 -74.0060 0-2 NE 15 45 Clearest (>30 km) 2025-04-06 2025 4 8 April Apr 6 Sunday Sun Apr 06 08:00:00 1970-01-01 08:00:00
2025-04-06 09:00:00 44.6 15 0.000 0.000 0 0 0 3 40026.2 2.20 66.037506 40.7128 -74.0060 2-4 NE 60 45 Clearest (>30 km) 2025-04-06 2025 4 9 April Apr 6 Sunday Sun Apr 06 09:00:00 1970-01-01 09:00:00
2025-04-06 10:00:00 44.8 13 0.004 0.004 0 0 0 51 40026.2 0.81 146.309906 40.7128 -74.0060 0-2 SE 135 135 Clearest (>30 km) 2025-04-06 2025 4 10 April Apr 6 Sunday Sun Apr 06 10:00:00 1970-01-01 10:00:00
2025-04-06 11:00:00 45.0 19 0.004 0.004 0 0 0 51 40026.2 3.14 4.085537 40.7128 -74.0060 2-4 N 0 0 Clearest (>30 km) 2025-04-06 2025 4 11 April Apr 6 Sunday Sun Apr 06 11:00:00 1970-01-01 11:00:00
2025-04-06 12:00:00 45.4 16 0.000 0.000 0 0 0 3 40026.2 6.00 333.435028 40.7128 -74.0060 4-6 NW 330 315 Clearest (>30 km) 2025-04-06 2025 4 12 April Apr 6 Sunday Sun Apr 06 12:00:00 1970-01-01 12:00:00
2025-04-06 13:00:00 45.6 25 0.000 0.000 0 0 0 3 40026.2 5.52 338.629303 40.7128 -74.0060 4-6 N 330 0 Clearest (>30 km) 2025-04-06 2025 4 13 April Apr 6 Sunday Sun Apr 06 13:00:00 1970-01-01 13:00:00
2025-04-06 14:00:00 46.5 31 0.000 0.000 0 0 0 3 40026.2 4.43 315.000092 40.7128 -74.0060 4-6 NW 315 315 Clearest (>30 km) 2025-04-06 2025 4 14 April Apr 6 Sunday Sun Apr 06 14:00:00 1970-01-01 14:00:00
2025-04-06 15:00:00 48.5 41 0.000 0.000 0 0 0 3 41010.5 8.36 344.475830 40.7128 -74.0060 8-10 N 330 0 Clearest (>30 km) 2025-04-06 2025 4 15 April Apr 6 Sunday Sun Apr 06 15:00:00 1970-01-01 15:00:00
2025-04-06 16:00:00 49.2 28 0.000 0.000 0 0 0 3 48556.4 8.95 360.000000 40.7128 -74.0060 8-10 N 345 0 Clearest (>30 km) 2025-04-06 2025 4 16 April Apr 6 Sunday Sun Apr 06 16:00:00 1970-01-01 16:00:00
2025-04-06 17:00:00 49.6 14 0.024 0.024 0 0 0 53 44291.3 7.38 345.963715 40.7128 -74.0060 6-8 N 345 0 Clearest (>30 km) 2025-04-06 2025 4 17 April Apr 6 Sunday Sun Apr 06 17:00:00 1970-01-01 17:00:00
2025-04-06 18:00:00 49.1 14 0.047 0.047 0 0 0 55 42650.9 7.02 300.650574 40.7128 -74.0060 6-8 NW 300 315 Clearest (>30 km) 2025-04-06 2025 4 18 April Apr 6 Sunday Sun Apr 06 18:00:00 1970-01-01 18:00:00
2025-04-06 19:00:00 50.4 8 0.008 0.008 0 0 0 51 53477.7 8.82 305.706787 40.7128 -74.0060 8-10 NW 300 315 Clearest (>30 km) 2025-04-06 2025 4 19 April Apr 6 Sunday Sun Apr 06 19:00:00 1970-01-01 19:00:00
2025-04-06 20:00:00 49.2 11 0.008 0.008 0 0 0 51 56102.4 7.33 301.263672 40.7128 -74.0060 6-8 NW 300 315 Clearest (>30 km) 2025-04-06 2025 4 20 April Apr 6 Sunday Sun Apr 06 20:00:00 1970-01-01 20:00:00
2025-04-06 21:00:00 48.2 6 0.000 0.000 0 0 0 3 67913.4 7.33 301.263672 40.7128 -74.0060 6-8 NW 300 315 Clearest (>30 km) 2025-04-06 2025 4 21 April Apr 6 Sunday Sun Apr 06 21:00:00 1970-01-01 21:00:00
2025-04-06 22:00:00 47.3 5 0.000 0.000 0 0 0 3 71522.3 6.53 308.047089 40.7128 -74.0060 6-8 NW 300 315 Clearest (>30 km) 2025-04-06 2025 4 22 April Apr 6 Sunday Sun Apr 06 22:00:00 1970-01-01 22:00:00
2025-04-06 23:00:00 45.5 9 0.000 0.000 0 0 0 3 66929.1 8.95 323.130005 40.7128 -74.0060 8-10 NW 315 315 Clearest (>30 km) 2025-04-06 2025 4 23 April Apr 6 Sunday Sun Apr 06 23:00:00 1970-01-01 23:00:00
2025-04-07 45.1 15 0.000 0.000 0 0 0 3 58070.9 3.33 289.653900 40.7128 -74.0060 2-4 W 285 270 Clearest (>30 km) 2025-04-07 2025 4 0 April Apr 7 Monday Mon Apr 07 00:00:00 1970-01-01
2025-04-07 01:00:00 45.2 16 0.016 0.016 0 0 0 51 56758.5 3.41 293.198608 40.7128 -74.0060 2-4 NW 285 315 Clearest (>30 km) 2025-04-07 2025 4 1 April Apr 7 Monday Mon Apr 07 01:00:00 1970-01-01 01:00:00
2025-04-07 02:00:00 44.8 20 0.016 0.016 0 0 0 51 55118.1 4.27 312.878906 40.7128 -74.0060 4-6 NW 300 315 Clearest (>30 km) 2025-04-07 2025 4 2 April Apr 7 Monday Mon Apr 07 02:00:00 1970-01-01 02:00:00
2025-04-07 03:00:00 44.5 31 0.000 0.000 0 0 0 3 54133.9 5.45 340.820892 40.7128 -74.0060 4-6 N 330 0 Clearest (>30 km) 2025-04-07 2025 4 3 April Apr 7 Monday Mon Apr 07 03:00:00 1970-01-01 03:00:00
2025-04-05 22:00:00 82.0 0 0.000 0.000 0 0 0 0 295275.6 6.99 7.352293 34.0522 -118.2437 6-8 N 0 0 Clearest (>30 km) 2025-04-05 2025 4 22 April Apr 5 Saturday Sat Apr 05 22:00:00 1970-01-01 22:00:00
2025-04-05 23:00:00 81.4 0 0.000 0.000 0 0 0 0 295275.6 7.73 247.890503 34.0522 -118.2437 6-8 W 240 270 Clearest (>30 km) 2025-04-05 2025 4 23 April Apr 5 Saturday Sat Apr 05 23:00:00 1970-01-01 23:00:00
2025-04-06 74.5 0 0.000 0.000 0 0 0 0 218175.9 15.62 240.865799 34.0522 -118.2437 10+ SW 240 225 Clearest (>30 km) 2025-04-06 2025 4 0 April Apr 6 Sunday Sun Apr 06 00:00:00 1970-01-01
2025-04-06 01:00:00 71.8 0 0.000 0.000 0 0 0 0 191929.1 11.18 253.739716 34.0522 -118.2437 10+ W 240 270 Clearest (>30 km) 2025-04-06 2025 4 1 April Apr 6 Sunday Sun Apr 06 01:00:00 1970-01-01 01:00:00
2025-04-06 02:00:00 70.1 0 0.000 0.000 0 0 0 0 194225.7 8.07 250.559875 34.0522 -118.2437 8-10 W 240 270 Clearest (>30 km) 2025-04-06 2025 4 2 April Apr 6 Sunday Sun Apr 06 02:00:00 1970-01-01 02:00:00
2025-04-06 03:00:00 65.2 0 0.000 0.000 0 0 0 0 202427.8 5.48 281.768250 34.0522 -118.2437 4-6 W 270 270 Clearest (>30 km) 2025-04-06 2025 4 3 April Apr 6 Sunday Sun Apr 06 03:00:00 1970-01-01 03:00:00
2025-04-06 04:00:00 62.4 0 0.000 0.000 0 0 0 0 191929.1 5.19 277.431305 34.0522 -118.2437 4-6 W 270 270 Clearest (>30 km) 2025-04-06 2025 4 4 April Apr 6 Sunday Sun Apr 06 04:00:00 1970-01-01 04:00:00
2025-04-06 05:00:00 60.5 0 0.000 0.000 0 0 0 0 187992.1 3.82 290.556122 34.0522 -118.2437 2-4 W 285 270 Clearest (>30 km) 2025-04-06 2025 4 5 April Apr 6 Sunday Sun Apr 06 05:00:00 1970-01-01 05:00:00
2025-04-06 06:00:00 58.7 0 0.000 0.000 0 0 0 0 174868.8 3.26 105.945465 34.0522 -118.2437 2-4 E 105 90 Clearest (>30 km) 2025-04-06 2025 4 6 April Apr 6 Sunday Sun Apr 06 06:00:00 1970-01-01 06:00:00
2025-04-06 07:00:00 56.9 0 0.000 0.000 0 0 0 0 160433.1 2.47 84.805664 34.0522 -118.2437 2-4 E 75 90 Clearest (>30 km) 2025-04-06 2025 4 7 April Apr 6 Sunday Sun Apr 06 07:00:00 1970-01-01 07:00:00
2025-04-06 08:00:00 54.9 0 0.000 0.000 0 0 0 0 137139.1 4.70 90.000000 34.0522 -118.2437 4-6 E 75 90 Clearest (>30 km) 2025-04-06 2025 4 8 April Apr 6 Sunday Sun Apr 06 08:00:00 1970-01-01 08:00:00
2025-04-06 09:00:00 54.8 0 0.000 0.000 0 0 0 0 156168.0 4.79 52.594578 34.0522 -118.2437 4-6 NE 45 45 Clearest (>30 km) 2025-04-06 2025 4 9 April Apr 6 Sunday Sun Apr 06 09:00:00 1970-01-01 09:00:00
2025-04-06 10:00:00 54.8 0 0.000 0.000 0 0 0 0 168963.3 5.52 68.629311 34.0522 -118.2437 4-6 E 60 90 Clearest (>30 km) 2025-04-06 2025 4 10 April Apr 6 Sunday Sun Apr 06 10:00:00 1970-01-01 10:00:00
2025-04-06 11:00:00 52.5 0 0.000 0.000 0 0 0 0 137467.2 6.20 64.359047 34.0522 -118.2437 6-8 NE 60 45 Clearest (>30 km) 2025-04-06 2025 4 11 April Apr 6 Sunday Sun Apr 06 11:00:00 1970-01-01 11:00:00
2025-04-06 12:00:00 52.4 0 0.000 0.000 0 0 0 0 146325.5 6.30 62.525658 34.0522 -118.2437 6-8 NE 60 45 Clearest (>30 km) 2025-04-06 2025 4 12 April Apr 6 Sunday Sun Apr 06 12:00:00 1970-01-01 12:00:00
2025-04-06 13:00:00 52.4 0 0.000 0.000 0 0 0 0 159448.8 6.41 60.751270 34.0522 -118.2437 6-8 NE 60 45 Clearest (>30 km) 2025-04-06 2025 4 13 April Apr 6 Sunday Sun Apr 06 13:00:00 1970-01-01 13:00:00
2025-04-06 14:00:00 53.3 0 0.000 0.000 0 0 0 0 178149.6 6.76 55.784248 34.0522 -118.2437 6-8 NE 45 45 Clearest (>30 km) 2025-04-06 2025 4 14 April Apr 6 Sunday Sun Apr 06 14:00:00 1970-01-01 14:00:00
2025-04-06 15:00:00 58.6 0 0.000 0.000 0 0 0 0 176509.2 4.53 57.094753 34.0522 -118.2437 4-6 NE 45 45 Clearest (>30 km) 2025-04-06 2025 4 15 April Apr 6 Sunday Sun Apr 06 15:00:00 1970-01-01 15:00:00
2025-04-06 16:00:00 66.4 0 0.000 0.000 0 0 0 0 208989.5 4.08 99.462250 34.0522 -118.2437 4-6 E 90 90 Clearest (>30 km) 2025-04-06 2025 4 16 April Apr 6 Sunday Sun Apr 06 16:00:00 1970-01-01 16:00:00
2025-04-06 17:00:00 71.4 0 0.000 0.000 0 0 0 0 227362.2 3.33 132.273621 34.0522 -118.2437 2-4 SE 120 135 Clearest (>30 km) 2025-04-06 2025 4 17 April Apr 6 Sunday Sun Apr 06 17:00:00 1970-01-01 17:00:00
2025-04-06 18:00:00 76.7 0 0.000 0.000 0 0 0 0 268372.7 2.69 175.236450 34.0522 -118.2437 2-4 S 165 180 Clearest (>30 km) 2025-04-06 2025 4 18 April Apr 6 Sunday Sun Apr 06 18:00:00 1970-01-01 18:00:00
2025-04-06 19:00:00 80.4 0 0.000 0.000 0 0 0 0 295275.6 2.86 218.659836 34.0522 -118.2437 2-4 SW 210 225 Clearest (>30 km) 2025-04-06 2025 4 19 April Apr 6 Sunday Sun Apr 06 19:00:00 1970-01-01 19:00:00
2025-04-06 20:00:00 83.4 0 0.000 0.000 0 0 0 0 295275.6 5.52 248.629303 34.0522 -118.2437 4-6 W 240 270 Clearest (>30 km) 2025-04-06 2025 4 20 April Apr 6 Sunday Sun Apr 06 20:00:00 1970-01-01 20:00:00
2025-04-06 21:00:00 81.7 0 0.000 0.000 0 0 0 0 284120.8 11.11 260.727478 34.0522 -118.2437 10+ W 255 270 Clearest (>30 km) 2025-04-06 2025 4 21 April Apr 6 Sunday Sun Apr 06 21:00:00 1970-01-01 21:00:00
2025-04-06 22:00:00 79.1 0 0.000 0.000 0 0 0 0 272309.7 11.81 260.180756 34.0522 -118.2437 10+ W 255 270 Clearest (>30 km) 2025-04-06 2025 4 22 April Apr 6 Sunday Sun Apr 06 22:00:00 1970-01-01 22:00:00
2025-04-06 23:00:00 77.6 0 0.000 0.000 0 0 0 0 270669.3 12.29 259.508575 34.0522 -118.2437 10+ W 255 270 Clearest (>30 km) 2025-04-06 2025 4 23 April Apr 6 Sunday Sun Apr 06 23:00:00 1970-01-01 23:00:00
2025-04-07 75.0 0 0.000 0.000 0 0 0 0 248031.5 11.81 260.180756 34.0522 -118.2437 10+ W 255 270 Clearest (>30 km) 2025-04-07 2025 4 0 April Apr 7 Monday Mon Apr 07 00:00:00 1970-01-01
2025-04-07 01:00:00 70.3 0 0.000 0.000 0 0 0 0 193569.6 11.07 261.869995 34.0522 -118.2437 10+ W 255 270 Clearest (>30 km) 2025-04-07 2025 4 1 April Apr 7 Monday Mon Apr 07 01:00:00 1970-01-01 01:00:00
2025-04-07 02:00:00 65.5 0 0.000 0.000 0 0 0 0 147309.7 7.84 266.729584 34.0522 -118.2437 6-8 W 255 270 Clearest (>30 km) 2025-04-07 2025 4 2 April Apr 7 Monday Mon Apr 07 02:00:00 1970-01-01 02:00:00
2025-04-07 03:00:00 61.3 0 0.000 0.000 0 0 0 0 110892.4 3.91 256.759460 34.0522 -118.2437 2-4 W 255 270 Clearest (>30 km) 2025-04-07 2025 4 3 April Apr 7 Monday Mon Apr 07 03:00:00 1970-01-01 03:00:00
source:
1 Date of the recorded data., 2 Temperature at 2 meters above ground., 3 Probability of precipitation., 4 Amount of precipitation., 5 Amount of rain., 6 Amount of showers., 7 Amount of snowfall., 8 Depth of snow., 9 Code representing the weather condition., 10 Visibility distance., 11 Wind speed at 10 meters above ground., 12 Wind direction at 10 meters above ground., 13 Vertical location coordinate., 14 Horizontal location coordinate., 15 Binned categories for wind speed., 16 Cardinal direction of the wind., 17 Binned categories for wind direction., 18 Numeric angle representing wind direction., 19 Categorized visibility levels., 20 Date without time, 21 Year extracted from the date., 22 Month extracted from the date., 23 Hour extracted from the date., 24 Name of the month., 25 Abbreviated name of the month., 26 Day extracted from the date., 27 Name of the weekday., 28 Abbreviated name of the weekday., 29 Combined month and day., 30 Time extracted from the date., 31 Common date format for time-based analysis.
Replace the forecast_data table; optionally, create an output preview object.
-- Replace the historical weather table
CREATE OR REPLACE TABLE forecast_data AS
SELECT * FROM transformed_forecast;

-- Preview results 
SELECT * FROM forecast_data LIMIT 10;
Code
DROP VIEW transformed_forecast;
Code
VACUUM forecast_data;

Dataset: Historical, 1974-2024

#| label: "preValidationHistorical"
#| code-summary: "Pre-Transformation Validation (Raw Data)"
#| code-fold: true

# Using existing DuckDB connection
raw_data <- tbl(duckdb_con, "historical_data")

# Create validation agent for raw data
raw_agent <- create_agent(tbl = raw_data,
                          actions = action_levels(warn_at = 0.01, stop_at = 0.05)) |>
     # Core structure validation
     col_exists(
          vars(
               date,
               temperature_2m,
               precipitation,
               rain,
               snowfall,
               snow_depth,
               weather_code,
               wind_speed,
               wind_direction
          )
     ) |>
     col_is_date(vars(date)) |>
     # Value range checks
     col_vals_between(vars(temperature_2m), -50, 130, na_pass = TRUE) |>
     col_vals_gte(vars(precipitation), 0, na_pass = TRUE) |>
     
     # Add valid codes
    # col_vals_in_set(vars(weather_code), set = codes) |> 
     
     col_vals_between(vars(wind_direction_10m), 0, 360, na_pass = TRUE) |>
     
     interrogate()
#| label: "outputRawAgent"
#| echo: false
raw_agent 
Modular SQL, in-database transformation
-- Create or replace the view with modular CTEs and explicit column lists
CREATE OR REPLACE VIEW transformed_historical AS
WITH cleaned_data AS (
SELECT
     date::TIMESTAMP AS date,
     ROUND(temperature_2m::FLOAT, 1) AS temperature_2m,
     ROUND(precipitation::FLOAT, 3) AS precipitation,
     ROUND(rain::FLOAT, 3) AS rain,
     ROUND(snowfall::FLOAT, 3) AS snowfall,
     ROUND(snow_depth::FLOAT, 3) AS snow_depth,
     weather_code AS weather_code,
     ROUND(wind_speed_10m::FLOAT, 2) AS wind_speed_10m,
     wind_direction_10m AS wind_direction_10m,
     latitude AS latitude,
     longitude AS longitude
FROM historical_data
),

transformed_data AS (
SELECT
     *,
-- Speed bin
CASE 
WHEN wind_speed_10m <= 2 THEN CAST('0-2' AS speed_bin_enum)
WHEN wind_speed_10m <= 4 THEN CAST('2-4' AS speed_bin_enum)
WHEN wind_speed_10m <= 6 THEN CAST('4-6' AS speed_bin_enum)
WHEN wind_speed_10m <= 8 THEN CAST('6-8' AS speed_bin_enum)
WHEN wind_speed_10m <= 10 THEN CAST('8-10' AS speed_bin_enum)
ELSE CAST('10+' AS speed_bin_enum)
END AS speed_bin,
-- Cardinal direction
CASE 
WHEN wind_direction_10m BETWEEN 0 AND 22.5 THEN CAST('N' AS cardinal_direction_enum)
WHEN wind_direction_10m BETWEEN 22.5 AND 67.5 THEN CAST('NE' AS cardinal_direction_enum)
WHEN wind_direction_10m BETWEEN 67.5 AND 112.5 THEN CAST('E' AS cardinal_direction_enum)
WHEN wind_direction_10m BETWEEN 112.5 AND 157.5 THEN CAST('SE' AS cardinal_direction_enum)
WHEN wind_direction_10m BETWEEN 157.5 AND 202.5 THEN CAST('S' AS cardinal_direction_enum)
WHEN wind_direction_10m BETWEEN 202.5 AND 247.5 THEN CAST('SW' AS cardinal_direction_enum)
WHEN wind_direction_10m BETWEEN 247.5 AND 292.5 THEN CAST('W' AS cardinal_direction_enum)
WHEN wind_direction_10m BETWEEN 292.5 AND 337.5 THEN CAST('NW' AS cardinal_direction_enum)
WHEN wind_direction_10m BETWEEN 337.5 AND 360 THEN CAST('N' AS cardinal_direction_enum)
ELSE NULL
END AS wind_direction_cardinal,
-- 15-degree direction bin (numeric)
FLOOR((wind_direction_10m - 1e-9) / 15) * 15 AS direction_bin
FROM cleaned_data
),

final_data AS (
SELECT
     *,
     -- Direction angle
     CASE
          WHEN wind_direction_cardinal = 'N' THEN 0
          WHEN wind_direction_cardinal = 'NE' THEN 45
          WHEN wind_direction_cardinal = 'E' THEN 90
          WHEN wind_direction_cardinal = 'SE' THEN 135
          WHEN wind_direction_cardinal = 'S' THEN 180
          WHEN wind_direction_cardinal = 'SW' THEN 225
          WHEN wind_direction_cardinal = 'W' THEN 270
          WHEN wind_direction_cardinal = 'NW' THEN 315
     ELSE NULL
     END AS direction_angle,
-- Date parts
strftime(date, '%m-%d-%Y') AS date_only,
EXTRACT(YEAR FROM date) AS year,
EXTRACT(MONTH FROM date) AS month,
EXTRACT(hour FROM date) AS hour,
monthname(date)::month_name_enum AS month_name,
strftime(date, '%b')::month_abb_enum AS month_abb,
EXTRACT(DAY FROM date) AS day,
dayname(date)::weekday_name_enum AS weekday_name,
strftime(date, '%a')::weekday_abb_enum AS weekday_abb,
strftime(date, '%b %d') AS month_day,
strftime(date, '%H:%M:%S') AS time_only,
strptime('1970-01-01 ' || strftime(date, '%H:%M:%S'), '%Y-%m-%d %H:%M:%S') AS common_date
FROM transformed_data
)

-- Final output
SELECT * FROM final_data;
#| label: "postTransformHistorical"
#| code-summary: "Post-Transformation Validation"
#| code-fold: true

transformed_data <- tbl(duckdb_con, "transformed_historical")

trans_agent <- create_agent(
     tbl = transformed_data, label = "Post-Transformed Validation", actions = action_levels(warn_at = 0.01, stop_at = 0.05)
) |>
     # Validate enum mappings
     col_is_factor(vars(weather_code, speed_bin, wind_direction_cardinal)) |>
     
     # Validate temperature decimal places (simpler arithmetic check)
     col_vals_expr(
          expr = ~ MOD(temperature_2m * 10, 1) == 0,
          label = "Temperature has 1 decimal place",
          #na_pass = TRUE  # Skip NA values automatically
     ) |>

     # Validate speed bin logic
     col_vals_in_set(vars(speed_bin), set = c("0-2", "2-4", "4-6", "6-8", "8-10", "10+")) |>
     
     # Date validations
     col_vals_between(vars(year), 1974, 2024) |>
     col_vals_between(vars(month), 1, 12) |>
     col_vals_between(vars(day), 1, 31) |>
     
     # Month/weekday validations
     col_vals_in_set(vars(month_name), set = month.name) |>
     col_vals_in_set(vars(month_abb), set = month.abb) |>
     col_vals_in_set(
          vars(weekday_name),
          set = c(
               "Sunday",
               "Monday",
               "Tuesday",
               "Wednesday",
               "Thursday",
               "Friday",
               "Saturday"
          )
     ) |>
     col_vals_in_set(vars(weekday_abb),
                     set = c("Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat")) |>
     
     # Alternative month_day validation
     col_vals_expr(
          expr = ~ month_day == sql("STRFTIME(date, '%b %d')"),
          label = "Month/day format matches date"
     ) |>
     
     # Alternative time_only validation
     col_vals_expr(
          expr = ~ time_only == sql("STRFTIME(date, '%H:%M:%S')"),
          label = "Time format matches date"
     ) |>

     # Common date validation
     col_is_date(vars(common_date)) |>
     col_vals_between(
          vars(common_date),
          left = as.POSIXct("1970-01-01 00:00:00"),
          right = as.POSIXct("1970-01-01 23:59:59")
     ) |>
     
     # Wind direction validations
     col_vals_between(vars(direction_angle), 0, 315, na_pass = TRUE) |>
     col_vals_in_set(vars(wind_direction_cardinal),
                     set = c("N", "NE", "E", "SE", "S", "SW", "W", "NW")) |>
     
     # Cross-validations using SQL expressions
     col_vals_expr(
          expr = ~ time_only == sql("STRFTIME(common_date, '%H:%M:%S')"),
          label = "Time matches common_date"
     ) |>
     
     col_vals_expr(
          expr = ~ month_name == sql(
               "CASE month
                WHEN 1 THEN 'January' WHEN 2 THEN 'February'
                WHEN 3 THEN 'March' WHEN 4 THEN 'April'
                WHEN 5 THEN 'May' WHEN 6 THEN 'June'
                WHEN 7 THEN 'July' WHEN 8 THEN 'August'
                WHEN 9 THEN 'September' WHEN 10 THEN 'October'
                WHEN 11 THEN 'November' WHEN 12 THEN 'December' END"
          ),
          label = "Month name matches month number"
     ) |>
     
     col_vals_expr(expr = ~ weekday_abb == sql("SUBSTR(weekday_name, 1, 3)"),
                   label = "Weekday abbreviation matches name") |>
     
     interrogate()
#| label: "transAgent"
#| echo: false
trans_agent
Code
-- Final output
SELECT * FROM transformed_historical LIMIT 20;
table setup
r_df <- viewOfHistorical |>
dplyr::mutate(
     date = as.character(date),
     common_date = as.character(common_date)
)

locations_list = colnames(r_df)

notes_list <- c(
     "Date of the recorded data.",
     "Temperature at 2 meters above ground.",
     "Amount of precipitation.",
     "Amount of rain.",
     "Amount of snowfall.",
     "Depth of snow.",
     "Code representing the weather condition.",
     "Wind speed at 10 meters above ground.",
     "Wind direction at 10 meters above ground.",
     "Vertical location coordinate.",
     "Horizontal location coordinate.",
     "Cardinal direction of the wind.",
     "Binned categories for wind speed.",
     "Binned categories for direction angle.",
     "Numeric angle representing wind direction.",
     "Date without time",
     "Year extracted from the date.",
     "Month extracted from the date.",
     "Hour extracted from the date.",
     "Name of the month.",
     "Abbreviated name of the month.",
     "Day extracted from the date.",
     "Name of the weekday.",
     "Abbreviated name of the weekday.",
     "Combined month and day.",
     "Time extracted from the date.",
     "Common date format for time-based analysis."
)

footnotes_df <- tibble(
  notes = notes_list, 
  locations = locations_list
)

pal_df <- tibble(
  cols = locations_list,
  pals = list(eval_palette("grDevices::Rocket", 10 , 'c', 1))
)

rTable <- r_table_theming(
r_df,
title = "Historical Data Preview",
subtitle = NULL,
footnotes_df,
source_note = md("**source**: "),
pal_df,
footnotes_multiline = FALSE,
table_font_size = pct(70),
#do_col_labels = TRUE,
)
Table 5
Historical Data Preview
date1 temperature_2m2 precipitation3 rain4 snowfall5 snow_depth6 weather_code7 wind_speed_10m8 wind_direction_10m9 latitude10 longitude11 speed_bin12 wind_direction_cardinal13 direction_bin14 direction_angle15 date_only16 year17 month18 hour19 month_name20 month_abb21 day22 weekday_name23 weekday_abb24 month_day25 time_only26 common_date27
1974-01-01 05:00:00 50.2 0 0 0 0 3 3.39 97.59455 33.4484 -112.074 2-4 E 90 90 01-01-1974 1974 1 5 January Jan 1 Tuesday Tue Jan 01 05:00:00 1970-01-01 05:00:00
1974-01-01 06:00:00 48.1 0 0 0 0 2 4.03 93.17977 33.4484 -112.074 4-6 E 90 90 01-01-1974 1974 1 6 January Jan 1 Tuesday Tue Jan 01 06:00:00 1970-01-01 06:00:00
1974-01-01 07:00:00 45.5 0 0 0 0 1 4.92 90.00000 33.4484 -112.074 4-6 E 75 90 01-01-1974 1974 1 7 January Jan 1 Tuesday Tue Jan 01 07:00:00 1970-01-01 07:00:00
1974-01-01 08:00:00 44.2 0 0 0 0 2 4.72 84.55976 33.4484 -112.074 4-6 E 75 90 01-01-1974 1974 1 8 January Jan 1 Tuesday Tue Jan 01 08:00:00 1970-01-01 08:00:00
1974-01-01 09:00:00 43.2 0 0 0 0 3 4.56 78.69010 33.4484 -112.074 4-6 E 75 90 01-01-1974 1974 1 9 January Jan 1 Tuesday Tue Jan 01 09:00:00 1970-01-01 09:00:00
1974-01-01 10:00:00 42.5 0 0 0 0 2 4.72 84.55976 33.4484 -112.074 4-6 E 75 90 01-01-1974 1974 1 10 January Jan 1 Tuesday Tue Jan 01 10:00:00 1970-01-01 10:00:00
1974-01-01 11:00:00 41.9 0 0 0 0 1 4.97 82.23492 33.4484 -112.074 4-6 E 75 90 01-01-1974 1974 1 11 January Jan 1 Tuesday Tue Jan 01 11:00:00 1970-01-01 11:00:00
1974-01-01 12:00:00 41.6 0 0 0 0 0 5.05 102.80426 33.4484 -112.074 4-6 E 90 90 01-01-1974 1974 1 12 January Jan 1 Tuesday Tue Jan 01 12:00:00 1970-01-01 12:00:00
1974-01-01 13:00:00 41.1 0 0 0 0 0 5.00 116.56499 33.4484 -112.074 4-6 SE 105 135 01-01-1974 1974 1 13 January Jan 1 Tuesday Tue Jan 01 13:00:00 1970-01-01 13:00:00
1974-01-01 14:00:00 41.0 0 0 0 0 0 6.36 129.28938 33.4484 -112.074 6-8 SE 120 135 01-01-1974 1974 1 14 January Jan 1 Tuesday Tue Jan 01 14:00:00 1970-01-01 14:00:00
1974-01-01 15:00:00 41.3 0 0 0 0 1 6.67 129.55963 33.4484 -112.074 6-8 SE 120 135 01-01-1974 1974 1 15 January Jan 1 Tuesday Tue Jan 01 15:00:00 1970-01-01 15:00:00
1974-01-01 16:00:00 45.0 0 0 0 0 1 9.03 131.98714 33.4484 -112.074 8-10 SE 120 135 01-01-1974 1974 1 16 January Jan 1 Tuesday Tue Jan 01 16:00:00 1970-01-01 16:00:00
1974-01-01 17:00:00 53.6 0 0 0 0 1 8.63 148.78166 33.4484 -112.074 8-10 SE 135 135 01-01-1974 1974 1 17 January Jan 1 Tuesday Tue Jan 01 17:00:00 1970-01-01 17:00:00
1974-01-01 18:00:00 59.6 0 0 0 0 1 8.75 175.60138 33.4484 -112.074 8-10 S 165 180 01-01-1974 1974 1 18 January Jan 1 Tuesday Tue Jan 01 18:00:00 1970-01-01 18:00:00
1974-01-01 19:00:00 64.2 0 0 0 0 3 12.85 211.47679 33.4484 -112.074 10+ SW 210 225 01-01-1974 1974 1 19 January Jan 1 Tuesday Tue Jan 01 19:00:00 1970-01-01 19:00:00
1974-01-01 20:00:00 65.7 0 0 0 0 3 22.42 233.93050 33.4484 -112.074 10+ SW 225 225 01-01-1974 1974 1 20 January Jan 1 Tuesday Tue Jan 01 20:00:00 1970-01-01 20:00:00
1974-01-01 21:00:00 65.9 0 0 0 0 3 25.00 236.30991 33.4484 -112.074 10+ SW 225 225 01-01-1974 1974 1 21 January Jan 1 Tuesday Tue Jan 01 21:00:00 1970-01-01 21:00:00
1974-01-01 22:00:00 64.9 0 0 0 0 3 21.06 247.52052 33.4484 -112.074 10+ W 240 270 01-01-1974 1974 1 22 January Jan 1 Tuesday Tue Jan 01 22:00:00 1970-01-01 22:00:00
1974-01-01 23:00:00 63.3 0 0 0 0 3 19.52 265.39999 33.4484 -112.074 10+ W 255 270 01-01-1974 1974 1 23 January Jan 1 Tuesday Tue Jan 01 23:00:00 1970-01-01 23:00:00
1974-01-02 60.1 0 0 0 0 3 16.48 277.80008 33.4484 -112.074 10+ W 270 270 01-02-1974 1974 1 0 January Jan 2 Wednesday Wed Jan 02 00:00:00 1970-01-01
source:
1 Date of the recorded data., 2 Temperature at 2 meters above ground., 3 Amount of precipitation., 4 Amount of rain., 5 Amount of snowfall., 6 Depth of snow., 7 Code representing the weather condition., 8 Wind speed at 10 meters above ground., 9 Wind direction at 10 meters above ground., 10 Vertical location coordinate., 11 Horizontal location coordinate., 12 Cardinal direction of the wind., 13 Binned categories for wind speed., 14 Binned categories for direction angle., 15 Numeric angle representing wind direction., 16 Date without time, 17 Year extracted from the date., 18 Month extracted from the date., 19 Hour extracted from the date., 20 Name of the month., 21 Abbreviated name of the month., 22 Day extracted from the date., 23 Name of the weekday., 24 Abbreviated name of the weekday., 25 Combined month and day., 26 Time extracted from the date., 27 Common date format for time-based analysis.
Replace the historical weather table
CREATE OR REPLACE TABLE historical_data AS
SELECT * FROM transformed_historical;
Drop the view
DROP VIEW transformed_historical;
Refresh database statistics for the query planner
VACUUM historical_data;

Weather Code Stats

Forecast

Install and load DuckDB’s spatial library.
INSTALL spatial; LOAD spatial;
Contextualize weather data by linking codes to actionable insights using in-database processing.
WITH routes AS (
  SELECT * FROM (
    VALUES
    (101, 38.748, -90.439, 40.7128, -74.0060, 280),
    (102, 40.7128, -74.0060, 34.0522, -118.2437, 220)
  ) AS t(route_id, start_lat, start_lon, end_lat, end_lon, distance_miles)
),

route_geometries AS (
  SELECT
    route_id,
    distance_miles,
    ST_MakeLine(
      ST_Point(start_lon, start_lat),
      ST_Point(end_lon, end_lat)
    ) AS route_line
  FROM routes
),

enhanced_forecast AS (
  SELECT
    *,
    ST_Point(longitude, latitude) AS forecast_point
  FROM forecast_data
),

route_weather_join AS (
  SELECT
    rg.route_id,
    rg.distance_miles,
    wc.*,
    ST_Distance(rg.route_line, ef.forecast_point) AS distance_from_route
  FROM route_geometries rg
  JOIN enhanced_forecast ef
    ON ST_Intersects(ST_Buffer(rg.route_line, 0.1), ef.forecast_point)
  JOIN weather_codes wc USING (weather_code)
)

SELECT
  route_id,
  MAX(severity) AS max_severity,
  SUM(risk_score) AS total_risk,
  AVG(fuel_multiplier) * distance_miles AS projected_fuel,
  SUM(risk_score * distance_miles / (60 * 10)) AS total_delay,
  COUNT(*) AS weather_points_impacted
FROM route_weather_join
GROUP BY route_id, distance_miles
ORDER BY route_id ASC;

SUM(route_delay_factor * distance_miles / 60) + SUM(border_delay_hours) AS total_delay,

table setup
locations_list = colnames(exampleOutput)

notes_list <- c(
  "Unique identifier for the transportation route",
  "Highest severity level of weather impacts along the route (Low/Moderate/High/Critical)",
  "Sum of all risk scores from weather events affecting the route",
  "Estimated total fuel consumption adjusted for weather multipliers",
  "Cumulative delay time (hours) due to weather-related speed reductions",
  "Number of geographic points along the route affected by adverse weather"
)

footnotes_df <- tibble(
  notes = notes_list, 
  locations = locations_list)

pal_df <- tibble(
  cols = locations_list
#  pals = list(eval_palette("viridis::viridis", 2, 'c', 1))
)

rTable <- r_table_theming(
exampleOutput,
title = "Experimental Route Attributes",
subtitle = NULL,
footnotes_df,
source_note = md("**source**: "),
pal_df,
multiline_feet = TRUE,
table_font_size = pct(95),
target_everything = TRUE,
#row_name_col = "route_id",
)
Table 6
Experimental Route Attributes
route_id1 max_severity2 total_risk3 projected_fuel4 total_delay5 weather_points_impacted6
101 Moderate 16.25 296.66 7.583333 60
102 Moderate 11.60 227.37 4.253333 60
source:
1 Unique identifier for the transportation route
2 Highest severity level of weather impacts along the route (Low/Moderate/High/Critical)
3 Sum of all risk scores from weather events affecting the route
4 Estimated total fuel consumption adjusted for weather multipliers
5 Cumulative delay time (hours) due to weather-related speed reductions
6 Number of geographic points along the route affected by adverse weather

Spatial Representation

Convert coordinates into geometric objects:

\(\color{yellow}{Routes\to LineStrings}\)

\(\color{yellow}{Forecast\ Points\to Points}\) \(\color{gray}{\text{where:}}\) \(\color{yellow}{LineString\small_R\normalsize=ST\_MakeLine(ST\_Point(start),\ ST\_Point(end))}\) \(\color{yellow}{Point(F)=ST\_Point(longitude,\ latitude)}\)

\({\textbf{R}}\) - Route definition (tuple of start/end coordinates)

\({\textbf{LineString(R)}}\) - Linear geometry connecting route endpoints, generated by: \(\color{gray}{ST\_MakeLine(ST\_Point(start_{Lon},\ start_{Lat}),\ ST\_Point(end_{Lon},\ end_{Lat}))}\)

\({\textbf{F}}\) - Raw forecast data point (from API)

\({\textbf{Point(F)}}\) - Geometric point representing weather observation, generated by: \(\color{gray}{\ ST\_Point(longitude,\ latitude))}\)

\({\textbf{start, end}}\) - Route endpoints (latitude/longitude pairs)

\({\textbf{ST\_Point}}\) - DuckDB function converting coordinates to points

\({\textbf{ST\_MakeLine}}\) - DuckDB function creating route lines

Risk Aggregation

Summarize impacts per route:

\(\color{yellow}{Total\ Risk_R=\sum{risk\_score}^{}}\)

\(\color{yellow}{Max\ Severity\small_R\normalsize=max(severity)}\)

\(\color{yellow}{Fuel\ Impact_R=distance\ \times \ \overline{fuel\_multiplier}}\)

\(\color{yellow}{Delay\small_R\normalsize=\left(\frac{distance}{60}\ \times\ route\_delay\right)\ +\ border\_delays}\)

\({\textbf{R}}\) - Route definition (tuple of start/end coordinates)

Spatial Filtering

Identify weather impacts along routes:

\(\color{yellow}{Impacted\ Points=\{F\ |\ ST\_Intersects(ST\_Buffer(LineString(R),\ Point(F))\}}\)

\({\textbf{F}}\) - Weather forecast points

\({\textbf{R}}\) - Route geometry

\({\textbf{ST\_Buffer}}\) - Expands route line by 0.1 degr. (~11 km at equator)

Simplified Pipeline

\[ \color{yellow}{Raw\ Forecast\overset{Spatialize}{\longrightarrow}Points\overset{Intersect\ Routes}{\longrightarrow}Filtered\ Data\overset{Aggregate}{\longrightarrow}Risk\ Metrics} \]

The workflow transforms raw coordinates into actionable route risk profiles using spatial relationships and weighted averages.

Historical EDA

Parameterized SQL Aggregation Function Examples

Full parameterization using a glue_sql template
glue_sql_mean <- function(con,
                     group_cols,
                     transformation_col,
                     metric_col,
                     from_tbl) {
     # Create parameterized query with glue_sql
     query <- glue::glue_sql("
     SELECT
          {`group_cols`*}
          ,AVG({`transformation_col`}) AS {`metric_col`}
     FROM {`from_tbl`}
     GROUP BY {`group_cols`*}
     ORDER BY {`group_cols`*}
     ", .con = con)
     return(dbGetQuery(con, query))
}

glue_sql_sum <- function(con,
                     group_cols,
                     transformation_col,
                     metric_col,
                     from_tbl) {
     # Create parameterized query with glue_sql
     query <- glue::glue_sql("
     SELECT
          {`group_cols`*}
          ,SUM({`transformation_col`}) AS {`metric_col`}
     FROM {`from_tbl`}
     GROUP BY {`group_cols`*}
     ORDER BY {`group_cols`*}
     ", .con = con)
     return(dbGetQuery(con, query))
}

glue_sql_count <- function(con,
                     group_cols,
                     transformation_col,
                     metric_col,
                     from_tbl) {
     # Create parameterized query with glue_sql
     query <- glue::glue_sql("
     SELECT
          {`group_cols`*}
          ,COUNT({`transformation_col`}) AS {`metric_col`}
     FROM {`from_tbl`}
     GROUP BY {`group_cols`*}
     ORDER BY {`group_cols`*}
     ", .con = con)
     return(dbGetQuery(con, query))
}
Code
# Define parameters
group_cols <- c("year", "month")
transformation_col <- "temperature_2m"
metric_col <- "avg_temp"
from_tbl <- "historical_data"

mean_data <- glue_sql_mean(
     duckdb_con, 
     group_cols, 
     transformation_col, 
     metric_col, 
     from_tbl
     )

# Define parameters
transformation_col <- "rain"
metric_col <- "sum_rain"

sum_data <- glue_sql_sum(
     duckdb_con, 
     group_cols, 
     transformation_col, 
     metric_col, 
     from_tbl
     )

transformation_col <- "weekday_name"
metric_col <- "count_weekdays"
group_cols <- c("year", "month", "weekday_abb")

count_data <- glue_sql_count(
     duckdb_con, 
     group_cols, 
     transformation_col, 
     metric_col, 
     from_tbl
)

Test A Correlation Visual

Code
Registered S3 method overwritten by 'GGally':
  method from   
  +.gg   ggplot2
Code
# Define parameters
group_cols <- c("year", "month")
transformation_col <- "temperature_2m"
metric_col <- "avg_temp"
from_tbl <- "historical_data"

query_data <- glue_sql_mean(
     duckdb_con, 
     group_cols, 
     transformation_col, 
     metric_col, 
     from_tbl
)

pm <- ggpairs(
query_data, 
columns = c("year", "month", "avg_temp"), 
columnLabels = c("Year", "Month", "Mean Temp"),
ggplot2::aes(color = as.factor(month), alpha = 0.5)
)
Code
pm + ggplot_theming()  # Apply custom theme
Figure 1
Code
corrplot 0.95 loaded
Code
# Subset temperature-related numerical variables
temp_vars <- tbl(duckdb_con, "historical_data") |> 
     select(latitude, temperature_2m, year, snowfall, snow_depth) |>
     collect() # |>
     #glimpse()

# Calculate correlation matrix (Pearson)
cor_matrix_temp <- cor(temp_vars, use = "complete.obs", method = "pearson")
Code
cor_matrix_temp
                    latitude temperature_2m          year     snowfall
latitude        1.000000e+00    -0.25852680  3.276914e-07  0.041753557
temperature_2m -2.585268e-01     1.00000000  3.739077e-02 -0.117114197
year            3.276914e-07     0.03739077  1.000000e+00 -0.005086443
snowfall        4.175356e-02    -0.11711420 -5.086443e-03  1.000000000
snow_depth      1.279244e-01    -0.34738754 -3.659271e-02  0.173762207
                snow_depth
latitude        0.12792438
temperature_2m -0.34738754
year           -0.03659271
snowfall        0.17376221
snow_depth      1.00000000
Code
# Visualize with corrplot
corrplot(cor_matrix_temp, method = "number", type = "upper", tl.cex = 0.7)

ANOVA for cateogorical (e.g., weather_code) to continuous data (e.g., temperature, precipitation)

Code
# Example: Weather code vs temperature
temp_weather_code <- tbl(duckdb_con, "historical_data") |> 
     select(temperature_2m, weather_code) |>
     dplyr::collect() 

anova_temp <-aov(temperature_2m ~ weather_code, data = temp_weather_code)

summary(anova_temp)
                  Df    Sum Sq  Mean Sq F value Pr(>F)    
weather_code       1 3.378e+07 33783893   90217 <2e-16 ***
Residuals    8941438 3.348e+09      374                   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Code
# Boxplot visualization
ggplot(temp_weather_code, aes(x = as.factor(weather_code), y = temperature_2m)) +
     geom_boxplot() +
     labs(title = "Temperature by Weather Code")

figure setup
# Subset precipitation variables
precip_vars <- tbl(duckdb_con, "historical_data") |> 
     select(precipitation, rain, snowfall, snow_depth, weather_code) |>
     dplyr::collect()

# Use Spearman for non-normal distributions
cor_matrix_precip <- cor(precip_vars, use = "complete.obs", method = "spearman")
Code
corrplot(cor_matrix_precip, method = "color", type = "upper")
Figure 2

Forecast Plot Testing

Create a plot list for wind roses
base_path = "data/plots/"

plot_wind_rose_ggplot(duckdb_con)

fileList <-list.files(base_path, pattern = "^wind_rose")
(a) Weather Codes
(b) Freezing/Non-Freezing Temperature
(c) Visibility (km)
(d) Visibility Categories
(e) Precipitation (empty if no precipitation)
(f) Wind Rose1
(g) Wind Rose2
(h) Wind Rose3
Figure 3: These are the grouped figures.

Disconnect From Databases

Dereference memory from the in-memory database connections.
dbDisconnect(duckdb_con)